Jellyfish042 commited on
Commit
1b6a46e
ยท
1 Parent(s): ceed5d4
.gitignore CHANGED
@@ -1,7 +1,7 @@
1
  /.env
2
  /.idea
3
  /.logs
4
- /.__pycache__
5
  /.temp
6
  .claude/
7
  CLAUDE.md
 
1
  /.env
2
  /.idea
3
  /.logs
4
+ __pycache__/
5
  /.temp
6
  .claude/
7
  CLAUDE.md
__pycache__/data_manager.cpython-311.pyc DELETED
Binary file (20.9 kB)
 
__pycache__/longctx_utils.cpython-311.pyc DELETED
Binary file (9.9 kB)
 
__pycache__/title.cpython-311.pyc DELETED
Binary file (1.77 kB)
 
app.py CHANGED
@@ -565,14 +565,12 @@ if __name__ == "__main__":
565
  color_selector = gr.CheckboxGroup(label="Colored Columns", choices=["Average", "Individual Tests"], value=initial_colors)
566
  with gr.Column():
567
  # Data Source ๅˆ†็ป„ๅฎšไน‰
568
- code_cols = ["github cpp", "github python", "github javascript"]
569
- research_cols = ["arxiv physics", "arxiv cs", "arxiv math"]
570
- writing_cols = ["ao3 english", "github markdown"]
571
- knowledge_cols = ["bbc news", "wikipedia english"]
572
 
573
  initial_code = [c for c in code_cols if c in initial_columns]
574
- initial_research = [c for c in research_cols if c in initial_columns]
575
- initial_writing = [c for c in writing_cols if c in initial_columns]
576
  initial_knowledge = [c for c in knowledge_cols if c in initial_columns]
577
 
578
  with gr.Column(elem_classes=["data-source-box"]):
@@ -585,68 +583,56 @@ if __name__ == "__main__":
585
  choices=initial_code, value=initial_code, show_label=False, scale=3, elem_classes=["aligned-checkboxes"]
586
  )
587
 
588
- # ็ง‘็ ” (Research)
589
  with gr.Row():
590
- toggle_research = gr.Checkbox(label="๐Ÿ”ฌ Science", value=True, scale=0, min_width=150)
591
- colfilter_research = gr.CheckboxGroup(
592
- choices=initial_research, value=initial_research, show_label=False, scale=3, elem_classes=["aligned-checkboxes"]
593
  )
594
 
595
- # ไธ–็•Œ็Ÿฅ่ฏ† (World Knowledge)
596
  with gr.Row():
597
  toggle_knowledge = gr.Checkbox(label="๐Ÿ“– Knowledge", value=True, scale=0, min_width=150)
598
  colfilter_knowledge = gr.CheckboxGroup(
599
  choices=initial_knowledge, value=initial_knowledge, show_label=False, scale=3, elem_classes=["aligned-checkboxes"]
600
  )
601
 
602
- # ๅ†™ไฝœ (Writing)
603
- with gr.Row():
604
- toggle_writing = gr.Checkbox(label="โœ๏ธ Writing", value=True, scale=0, min_width=150)
605
- colfilter_writing = gr.CheckboxGroup(
606
- choices=initial_writing, value=initial_writing, show_label=False, scale=3, elem_classes=["aligned-checkboxes"]
607
- )
608
-
609
- # ๅคš่ฏญ่จ€ (Multilingual) - Coming Soon
610
- with gr.Row():
611
- gr.Checkbox(label="๐ŸŒ Multilingual (Coming Soon)", value=False, interactive=False, scale=0, min_width=250)
612
 
613
  table = gr.HTML(initial_data)
614
 
615
  def update_table_wrapper(
616
- period, models_size, metric, code_sel, research_sel, writing_sel, knowledge_sel, color_columns, size_range, midpoint
617
  ):
618
- visible_columns = code_sel + research_sel + writing_sel + knowledge_sel
619
  return update_table(data_manager, period, models_size, metric, visible_columns, color_columns, size_range, midpoint)
620
 
621
- def update_column_choices(period, cur_code, cur_research, cur_writing, cur_knowledge):
622
  if not period:
623
  empty = gr.update(choices=[], value=[])
624
- return empty, empty, empty, empty
625
  columns = data_manager.get_available_columns(period)
626
 
627
  new_code = [c for c in code_cols if c in columns]
628
- new_research = [c for c in research_cols if c in columns]
629
- new_writing = [c for c in writing_cols if c in columns]
630
  new_knowledge = [c for c in knowledge_cols if c in columns]
631
 
632
  sel_code = [c for c in cur_code if c in new_code] if cur_code else new_code
633
- sel_research = [c for c in cur_research if c in new_research] if cur_research else new_research
634
- sel_writing = [c for c in cur_writing if c in new_writing] if cur_writing else new_writing
635
  sel_knowledge = [c for c in cur_knowledge if c in new_knowledge] if cur_knowledge else new_knowledge
636
 
637
  if not sel_code:
638
  sel_code = new_code
639
- if not sel_research:
640
- sel_research = new_research
641
- if not sel_writing:
642
- sel_writing = new_writing
643
  if not sel_knowledge:
644
  sel_knowledge = new_knowledge
645
 
646
  return (
647
  gr.update(choices=new_code, value=sel_code),
648
- gr.update(choices=new_research, value=sel_research),
649
- gr.update(choices=new_writing, value=sel_writing),
650
  gr.update(choices=new_knowledge, value=sel_knowledge),
651
  )
652
 
@@ -656,11 +642,8 @@ if __name__ == "__main__":
656
  return valid_cols if enabled else []
657
 
658
  toggle_code.change(lambda enabled: toggle_group(enabled, code_cols, initial_columns), inputs=[toggle_code], outputs=[colfilter_code])
659
- toggle_research.change(
660
- lambda enabled: toggle_group(enabled, research_cols, initial_columns), inputs=[toggle_research], outputs=[colfilter_research]
661
- )
662
- toggle_writing.change(
663
- lambda enabled: toggle_group(enabled, writing_cols, initial_columns), inputs=[toggle_writing], outputs=[colfilter_writing]
664
  )
665
  toggle_knowledge.change(
666
  lambda enabled: toggle_group(enabled, knowledge_cols, initial_columns), inputs=[toggle_knowledge], outputs=[colfilter_knowledge]
@@ -671,8 +654,7 @@ if __name__ == "__main__":
671
  model_selector,
672
  metric_selector,
673
  colfilter_code,
674
- colfilter_research,
675
- colfilter_writing,
676
  colfilter_knowledge,
677
  color_selector,
678
  size_range_slider,
@@ -681,15 +663,14 @@ if __name__ == "__main__":
681
 
682
  period_selector.change(
683
  update_column_choices,
684
- inputs=[period_selector, colfilter_code, colfilter_research, colfilter_writing, colfilter_knowledge],
685
- outputs=[colfilter_code, colfilter_research, colfilter_writing, colfilter_knowledge],
686
  )
687
  period_selector.change(update_table_wrapper, inputs=shared_inputs, outputs=table)
688
  model_selector.change(update_table_wrapper, inputs=shared_inputs, outputs=table)
689
  metric_selector.change(update_table_wrapper, inputs=shared_inputs, outputs=table)
690
  colfilter_code.change(update_table_wrapper, inputs=shared_inputs, outputs=table)
691
- colfilter_research.change(update_table_wrapper, inputs=shared_inputs, outputs=table)
692
- colfilter_writing.change(update_table_wrapper, inputs=shared_inputs, outputs=table)
693
  colfilter_knowledge.change(update_table_wrapper, inputs=shared_inputs, outputs=table)
694
  color_selector.change(update_table_wrapper, inputs=shared_inputs, outputs=table)
695
  size_range_slider.change(update_table_wrapper, inputs=shared_inputs, outputs=table)
@@ -981,15 +962,15 @@ if __name__ == "__main__":
981
  # ๆ•ฐๆฎ้›†ๅˆ—่กจ
982
  all_datasets = [
983
  "github cpp",
984
- "github python",
985
  "github javascript",
 
 
 
986
  "arxiv physics",
987
  "arxiv cs",
988
- "arxiv math",
989
- "ao3 english",
990
- "github markdown",
991
- "bbc news",
992
  "wikipedia english",
 
 
993
  ]
994
  initial_datasets = all_datasets[:4]
995
 
 
565
  color_selector = gr.CheckboxGroup(label="Colored Columns", choices=["Average", "Individual Tests"], value=initial_colors)
566
  with gr.Column():
567
  # Data Source ๅˆ†็ป„ๅฎšไน‰
568
+ code_cols = ["github cpp", "github javascript", "github python", "github markdown"]
569
+ science_cols = ["arxiv math", "arxiv physics", "arxiv cs"]
570
+ knowledge_cols = ["wikipedia english", "bbc news", "ao3 english"]
 
571
 
572
  initial_code = [c for c in code_cols if c in initial_columns]
573
+ initial_science = [c for c in science_cols if c in initial_columns]
 
574
  initial_knowledge = [c for c in knowledge_cols if c in initial_columns]
575
 
576
  with gr.Column(elem_classes=["data-source-box"]):
 
583
  choices=initial_code, value=initial_code, show_label=False, scale=3, elem_classes=["aligned-checkboxes"]
584
  )
585
 
586
+ # ็ง‘ๅญฆ (Science)
587
  with gr.Row():
588
+ toggle_science = gr.Checkbox(label="๐Ÿ”ฌ Science", value=True, scale=0, min_width=150)
589
+ colfilter_science = gr.CheckboxGroup(
590
+ choices=initial_science, value=initial_science, show_label=False, scale=3, elem_classes=["aligned-checkboxes"]
591
  )
592
 
593
+ # ไธ–็•Œ็Ÿฅ่ฏ† (Knowledge)
594
  with gr.Row():
595
  toggle_knowledge = gr.Checkbox(label="๐Ÿ“– Knowledge", value=True, scale=0, min_width=150)
596
  colfilter_knowledge = gr.CheckboxGroup(
597
  choices=initial_knowledge, value=initial_knowledge, show_label=False, scale=3, elem_classes=["aligned-checkboxes"]
598
  )
599
 
600
+ # # ๅคš่ฏญ่จ€ (Multilingual) - Coming Soon
601
+ # with gr.Row():
602
+ # gr.Checkbox(label="๐ŸŒ Multilingual (Coming Soon)", value=False, interactive=False, scale=0, min_width=250)
 
 
 
 
 
 
 
603
 
604
  table = gr.HTML(initial_data)
605
 
606
  def update_table_wrapper(
607
+ period, models_size, metric, code_sel, science_sel, knowledge_sel, color_columns, size_range, midpoint
608
  ):
609
+ visible_columns = code_sel + science_sel + knowledge_sel
610
  return update_table(data_manager, period, models_size, metric, visible_columns, color_columns, size_range, midpoint)
611
 
612
+ def update_column_choices(period, cur_code, cur_science, cur_knowledge):
613
  if not period:
614
  empty = gr.update(choices=[], value=[])
615
+ return empty, empty, empty
616
  columns = data_manager.get_available_columns(period)
617
 
618
  new_code = [c for c in code_cols if c in columns]
619
+ new_science = [c for c in science_cols if c in columns]
 
620
  new_knowledge = [c for c in knowledge_cols if c in columns]
621
 
622
  sel_code = [c for c in cur_code if c in new_code] if cur_code else new_code
623
+ sel_science = [c for c in cur_science if c in new_science] if cur_science else new_science
 
624
  sel_knowledge = [c for c in cur_knowledge if c in new_knowledge] if cur_knowledge else new_knowledge
625
 
626
  if not sel_code:
627
  sel_code = new_code
628
+ if not sel_science:
629
+ sel_science = new_science
 
 
630
  if not sel_knowledge:
631
  sel_knowledge = new_knowledge
632
 
633
  return (
634
  gr.update(choices=new_code, value=sel_code),
635
+ gr.update(choices=new_science, value=sel_science),
 
636
  gr.update(choices=new_knowledge, value=sel_knowledge),
637
  )
638
 
 
642
  return valid_cols if enabled else []
643
 
644
  toggle_code.change(lambda enabled: toggle_group(enabled, code_cols, initial_columns), inputs=[toggle_code], outputs=[colfilter_code])
645
+ toggle_science.change(
646
+ lambda enabled: toggle_group(enabled, science_cols, initial_columns), inputs=[toggle_science], outputs=[colfilter_science]
 
 
 
647
  )
648
  toggle_knowledge.change(
649
  lambda enabled: toggle_group(enabled, knowledge_cols, initial_columns), inputs=[toggle_knowledge], outputs=[colfilter_knowledge]
 
654
  model_selector,
655
  metric_selector,
656
  colfilter_code,
657
+ colfilter_science,
 
658
  colfilter_knowledge,
659
  color_selector,
660
  size_range_slider,
 
663
 
664
  period_selector.change(
665
  update_column_choices,
666
+ inputs=[period_selector, colfilter_code, colfilter_science, colfilter_knowledge],
667
+ outputs=[colfilter_code, colfilter_science, colfilter_knowledge],
668
  )
669
  period_selector.change(update_table_wrapper, inputs=shared_inputs, outputs=table)
670
  model_selector.change(update_table_wrapper, inputs=shared_inputs, outputs=table)
671
  metric_selector.change(update_table_wrapper, inputs=shared_inputs, outputs=table)
672
  colfilter_code.change(update_table_wrapper, inputs=shared_inputs, outputs=table)
673
+ colfilter_science.change(update_table_wrapper, inputs=shared_inputs, outputs=table)
 
674
  colfilter_knowledge.change(update_table_wrapper, inputs=shared_inputs, outputs=table)
675
  color_selector.change(update_table_wrapper, inputs=shared_inputs, outputs=table)
676
  size_range_slider.change(update_table_wrapper, inputs=shared_inputs, outputs=table)
 
962
  # ๆ•ฐๆฎ้›†ๅˆ—่กจ
963
  all_datasets = [
964
  "github cpp",
 
965
  "github javascript",
966
+ "github python",
967
+ "github markdown",
968
+ "arxiv math",
969
  "arxiv physics",
970
  "arxiv cs",
 
 
 
 
971
  "wikipedia english",
972
+ "bbc news",
973
+ "ao3 english",
974
  ]
975
  initial_datasets = all_datasets[:4]
976
 
data_manager.py CHANGED
@@ -246,18 +246,17 @@ class DataManager:
246
  column_priority = [
247
  # ไปฃ็  (Code)
248
  "github cpp",
249
- "github python",
250
  "github javascript",
251
- # ็ง‘็ ” (Research)
 
 
 
252
  "arxiv physics",
253
  "arxiv cs",
254
- "arxiv math",
255
- # ไธ–็•Œ็Ÿฅ่ฏ† (World Knowledge)
256
- "bbc news",
257
  "wikipedia english",
258
- # ๅ†™ไฝœ (Writing)
259
  "ao3 english",
260
- "github markdown",
261
  ]
262
  existing_cols = filtered_df.columns.tolist()
263
  ordered_cols = []
 
246
  column_priority = [
247
  # ไปฃ็  (Code)
248
  "github cpp",
 
249
  "github javascript",
250
+ "github python",
251
+ "github markdown",
252
+ # ็ง‘ๅญฆ (Science)
253
+ "arxiv math",
254
  "arxiv physics",
255
  "arxiv cs",
256
+ # ไธ–็•Œ็Ÿฅ่ฏ† (Knowledge)
 
 
257
  "wikipedia english",
258
+ "bbc news",
259
  "ao3 english",
 
260
  ]
261
  existing_cols = filtered_df.columns.tolist()
262
  ordered_cols = []
title.py CHANGED
@@ -27,7 +27,7 @@ table {
27
  }
28
  """
29
  TITLE_HTML = '<h1 style="text-align:center"><span style="font-size:1.3em">๐Ÿ† LLM Compression Leaderboard (Base Model)</span></h1>'
30
- SUBTITLE_HTML = "<h1 style='text-align:center'><span style='font-size:0.8em'>Welcome to Uncheatable Eval LLM Compression Leaderboard, where fancy post-training and cheating won't work ๐Ÿšซ; only compute ๐Ÿ’ป, data ๐Ÿ“Š, and real innovation ๐Ÿ”ฅ can prevail!</span></h1>"
31
  LINKS_HTML = """
32
  <div style="display: flex; flex-direction: row; justify-content: center; align-items: center; gap: 20px; margin: 10px 0;">
33
  <a href="https://github.com/Jellyfish042/uncheatable_eval" target="_blank" style="text-decoration: none;">
 
27
  }
28
  """
29
  TITLE_HTML = '<h1 style="text-align:center"><span style="font-size:1.3em">๐Ÿ† LLM Compression Leaderboard (Base Model)</span></h1>'
30
+ SUBTITLE_HTML = "<h1 style='text-align:center'><span style='font-size:0.8em'>Welcome to Uncheatable Eval LLM Compression Leaderboard, where fancy fine-tuning and cheating won't work ๐Ÿšซ; only compute ๐Ÿ’ป, data ๐Ÿ“Š, and real innovation ๐Ÿ”ฅ can prevail!</span></h1>"
31
  LINKS_HTML = """
32
  <div style="display: flex; flex-direction: row; justify-content: center; align-items: center; gap: 20px; margin: 10px 0;">
33
  <a href="https://github.com/Jellyfish042/uncheatable_eval" target="_blank" style="text-decoration: none;">