davidkim205 commited on
Commit
1b0c644
Β·
1 Parent(s): 3ac4c37

Fixed issue with charts loading slowly.

Browse files
Files changed (1) hide show
  1. app.py +54 -49
app.py CHANGED
@@ -154,6 +154,7 @@ df_full_rs = df_full_rs.apply(add_link, axis=1)
154
 
155
  df_full_rs = df_full_rs.drop(columns=['Ko-Bench', 'link', 'organization'])
156
 
 
157
  # dataframe
158
  df_rs['MT-Bench'] = '' # MT-Bench μ—΄ μΆ”κ°€
159
  df_rs['MT-Bench'] = df_rs['model'].apply(get_mt_bench)
@@ -285,10 +286,10 @@ def search_openai_plot(dropdown_model): # openai plot ν•¨μˆ˜ μ •μ˜
285
  openai_turn2 = df.loc[condition4, 'Coding':'Writing'].values.tolist()
286
 
287
  category_labels = []
288
- category_labels.append(openai_top_model + " /Turn 1")
289
- category_labels.append(openai_top_model + " /Turn 2")
290
- category_labels.append(dropdown_model + " /Turn 1")
291
- category_labels.append(dropdown_model + " /Turn 2")
292
 
293
  fig = radar_chart(CATEGORIES, top1_openai_turn1, top1_openai_turn2, openai_turn1, openai_turn2, category_labels,"openai")
294
  return fig
@@ -310,10 +311,10 @@ def search_keval_plot(dropdown_model): # keval plot ν•¨μˆ˜ μ •μ˜
310
  keval_turn2 = df.loc[condition4, 'Coding':'Writing'].values.tolist()
311
 
312
  category_labels = []
313
- category_labels.append(keval_top_model + " /Turn 1")
314
- category_labels.append(keval_top_model + " /Turn 2")
315
- category_labels.append(dropdown_model + " /Turn 1")
316
- category_labels.append(dropdown_model + " /Turn 2")
317
 
318
  fig = radar_chart(CATEGORIES, top1_keval_turn1, top1_keval_turn2, keval_turn1, keval_turn2, category_labels, "keval")
319
  return fig
@@ -327,13 +328,13 @@ def plot_average():
327
 
328
  # gpt-4o
329
  fig.add_trace(go.Scatter(x=turn_df['model'], y=turn_df['Ko-Bench/openai'], mode='lines+markers',
330
- name=f'gpt-4o(Average)',
331
  line=dict(color=colors[0][0], dash='dash'),
332
  marker=dict(symbol='x', size=10)))
333
 
334
  # keval
335
  fig.add_trace(go.Scatter(x=turn_df['model'], y=turn_df['Ko-Bench/keval'], mode='lines+markers',
336
- name=f'keval(Average)',
337
  line=dict(color=colors[0][1]),
338
  marker=dict(symbol='circle', size=10)))
339
 
@@ -352,44 +353,48 @@ def plot_average():
352
 
353
  #gradio
354
  with gr.Blocks(css='assets/leaderboard.css') as demo:
355
- gr.Markdown("")
356
- gr.Markdown("# πŸ† Ko-Bench Leaderboard")
357
- gr.Markdown("")
358
- gr.Markdown("#### The Ko-Bench is a leaderboard for evaluating the multi-level conversation ability and instruction-following ability of Korean Large Language Models (LLMs).")
359
- gr.Markdown("- MT-Bench: a set of challenging multi-turn questions. We use GPT-4 to grade the model responses.")
360
- gr.Markdown("- Ko-Bench/openai: a set of challenging multi-turn questions in Korean. We use GPT-4o to grade the model responses.")
361
- gr.Markdown("- Ko-Bench/keval: a set of challenging multi-turn questions in Korean. We use the keval model as an evaluation model.")
362
- gr.Markdown("")
363
- gr.Markdown("github : https://github.com/davidkim205/Ko-Bench")
364
- gr.Markdown("keval : https://huggingface.co/collections/davidkim205/k-eval-6660063dd66e21cbdcc4fbf1")
365
- gr.Markdown("")
366
-
367
- with gr.Row():
368
- with gr.TabItem("Ko-Bench"):
369
- gr.Dataframe(value=df_full_rs,
370
- datatype=['html' if col == 'model' else 'markdown' for col in df_full_rs.columns])
371
- with gr.Row():
372
- with gr.TabItem("Average"):
373
- gr.Plot(plot_average)
374
- with gr.TabItem("Openai Judgment"):
375
- gr.Dataframe(value=df_openai,
376
- datatype=['html' if col == 'model' else 'markdown' for col in df_openai.columns])
377
- with gr.TabItem("Keval Judgment"):
378
- gr.Dataframe(value=df_keval,
379
- datatype=['html' if col == 'model' else 'markdown' for col in df_keval.columns])
380
- with gr.TabItem("Model Detail View"):
381
- with gr.Blocks():
382
- with gr.Row():
383
- dropdown = gr.Dropdown(choices=plot_models_list, label="Choose a Model")
384
  with gr.Row():
385
- dataframe = gr.Dataframe(label="Model Detail View")
386
- dropdown.change(fn=search_dataframe, inputs=dropdown, outputs=dataframe)
387
  with gr.Row():
388
- plot_openai = gr.Plot(label="Openai Plot")
389
- dropdown.change(fn=search_openai_plot, inputs=dropdown, outputs=plot_openai)
390
- plot_keval = gr.Plot(label="Keval Plot")
391
- dropdown.change(fn=search_keval_plot, inputs=dropdown, outputs=plot_keval)
392
-
393
-
394
-
395
- demo.launch(share=True, server_name="0.0.0.0", debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
 
155
  df_full_rs = df_full_rs.drop(columns=['Ko-Bench', 'link', 'organization'])
156
 
157
+
158
  # dataframe
159
  df_rs['MT-Bench'] = '' # MT-Bench μ—΄ μΆ”κ°€
160
  df_rs['MT-Bench'] = df_rs['model'].apply(get_mt_bench)
 
286
  openai_turn2 = df.loc[condition4, 'Coding':'Writing'].values.tolist()
287
 
288
  category_labels = []
289
+ category_labels.append(openai_top_model + " (Turn 1)")
290
+ category_labels.append(openai_top_model + " (Turn 2)")
291
+ category_labels.append(dropdown_model + " (Turn 1)")
292
+ category_labels.append(dropdown_model + " (Turn 2)")
293
 
294
  fig = radar_chart(CATEGORIES, top1_openai_turn1, top1_openai_turn2, openai_turn1, openai_turn2, category_labels,"openai")
295
  return fig
 
311
  keval_turn2 = df.loc[condition4, 'Coding':'Writing'].values.tolist()
312
 
313
  category_labels = []
314
+ category_labels.append(keval_top_model + " (Turn 1)")
315
+ category_labels.append(keval_top_model + " (Turn 2)")
316
+ category_labels.append(dropdown_model + " (Turn 1)")
317
+ category_labels.append(dropdown_model + " (Turn 2)")
318
 
319
  fig = radar_chart(CATEGORIES, top1_keval_turn1, top1_keval_turn2, keval_turn1, keval_turn2, category_labels, "keval")
320
  return fig
 
328
 
329
  # gpt-4o
330
  fig.add_trace(go.Scatter(x=turn_df['model'], y=turn_df['Ko-Bench/openai'], mode='lines+markers',
331
+ name=f'gpt-4o (Average)',
332
  line=dict(color=colors[0][0], dash='dash'),
333
  marker=dict(symbol='x', size=10)))
334
 
335
  # keval
336
  fig.add_trace(go.Scatter(x=turn_df['model'], y=turn_df['Ko-Bench/keval'], mode='lines+markers',
337
+ name=f'keval (Average)',
338
  line=dict(color=colors[0][1]),
339
  marker=dict(symbol='circle', size=10)))
340
 
 
353
 
354
  #gradio
355
  with gr.Blocks(css='assets/leaderboard.css') as demo:
356
+ with gr.Blocks():
357
+ gr.Markdown("")
358
+ gr.Markdown("# πŸ† Ko-Bench Leaderboard")
359
+ gr.Markdown("")
360
+ gr.Markdown(
361
+ "#### The Ko-Bench is a leaderboard for evaluating the multi-level conversation ability and instruction-following ability of Korean Large Language Models (LLMs).")
362
+ gr.Markdown("- MT-Bench: a set of challenging multi-turn questions. We use GPT-4 to grade the model responses.")
363
+ gr.Markdown(
364
+ "- Ko-Bench/openai: a set of challenging multi-turn questions in Korean. We use GPT-4o to grade the model responses.")
365
+ gr.Markdown(
366
+ "- Ko-Bench/keval: a set of challenging multi-turn questions in Korean. We use the keval model as an evaluation model.")
367
+ gr.Markdown("")
368
+ gr.Markdown("github : https://github.com/davidkim205/Ko-Bench")
369
+ gr.Markdown("keval : https://huggingface.co/collections/davidkim205/k-eval-6660063dd66e21cbdcc4fbf1")
370
+ gr.Markdown("")
371
+
372
+ with gr.Blocks():
373
+ with gr.Row():
374
+ with gr.TabItem("Ko-Bench"):
 
 
 
 
 
 
 
 
 
 
375
  with gr.Row():
376
+ gr.Dataframe(value=df_full_rs,
377
+ datatype=['html' if col == 'model' else 'markdown' for col in df_full_rs.columns])
378
  with gr.Row():
379
+ avg = plot_average()
380
+ gr.Plot(avg)
381
+ with gr.TabItem("Openai Judgment"):
382
+ gr.Dataframe(value=df_openai,
383
+ datatype=['html' if col == 'model' else 'markdown' for col in df_openai.columns])
384
+ with gr.TabItem("Keval Judgment"):
385
+ gr.Dataframe(value=df_keval,
386
+ datatype=['html' if col == 'model' else 'markdown' for col in df_keval.columns])
387
+ with gr.TabItem("Model Detail View"):
388
+ with gr.Blocks():
389
+ with gr.Row():
390
+ dropdown = gr.Dropdown(choices=plot_models_list, label="Choose a Model")
391
+ with gr.Row():
392
+ dataframe = gr.Dataframe(label="Model Detail View")
393
+ dropdown.change(fn=search_dataframe, inputs=dropdown, outputs=dataframe)
394
+ with gr.Row():
395
+ plot_openai = gr.Plot(label="Openai Plot")
396
+ dropdown.change(fn=search_openai_plot, inputs=dropdown, outputs=plot_openai)
397
+ plot_keval = gr.Plot(label="Keval Plot")
398
+ dropdown.change(fn=search_keval_plot, inputs=dropdown, outputs=plot_keval)
399
+
400
+ demo.launch(share=True, server_name="0.0.0.0", server_port=7860, debug=True)