Spaces:
Running
Running
Commit ยท
1b6a46e
1
Parent(s): ceed5d4
update
Browse files- .gitignore +1 -1
- __pycache__/data_manager.cpython-311.pyc +0 -0
- __pycache__/longctx_utils.cpython-311.pyc +0 -0
- __pycache__/title.cpython-311.pyc +0 -0
- app.py +32 -51
- data_manager.py +6 -7
- title.py +1 -1
.gitignore
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
/.env
|
| 2 |
/.idea
|
| 3 |
/.logs
|
| 4 |
-
|
| 5 |
/.temp
|
| 6 |
.claude/
|
| 7 |
CLAUDE.md
|
|
|
|
| 1 |
/.env
|
| 2 |
/.idea
|
| 3 |
/.logs
|
| 4 |
+
__pycache__/
|
| 5 |
/.temp
|
| 6 |
.claude/
|
| 7 |
CLAUDE.md
|
__pycache__/data_manager.cpython-311.pyc
DELETED
|
Binary file (20.9 kB)
|
|
|
__pycache__/longctx_utils.cpython-311.pyc
DELETED
|
Binary file (9.9 kB)
|
|
|
__pycache__/title.cpython-311.pyc
DELETED
|
Binary file (1.77 kB)
|
|
|
app.py
CHANGED
|
@@ -565,14 +565,12 @@ if __name__ == "__main__":
|
|
| 565 |
color_selector = gr.CheckboxGroup(label="Colored Columns", choices=["Average", "Individual Tests"], value=initial_colors)
|
| 566 |
with gr.Column():
|
| 567 |
# Data Source ๅ็ปๅฎไน
|
| 568 |
-
code_cols = ["github cpp", "github python", "github
|
| 569 |
-
|
| 570 |
-
|
| 571 |
-
knowledge_cols = ["bbc news", "wikipedia english"]
|
| 572 |
|
| 573 |
initial_code = [c for c in code_cols if c in initial_columns]
|
| 574 |
-
|
| 575 |
-
initial_writing = [c for c in writing_cols if c in initial_columns]
|
| 576 |
initial_knowledge = [c for c in knowledge_cols if c in initial_columns]
|
| 577 |
|
| 578 |
with gr.Column(elem_classes=["data-source-box"]):
|
|
@@ -585,68 +583,56 @@ if __name__ == "__main__":
|
|
| 585 |
choices=initial_code, value=initial_code, show_label=False, scale=3, elem_classes=["aligned-checkboxes"]
|
| 586 |
)
|
| 587 |
|
| 588 |
-
# ็ง
|
| 589 |
with gr.Row():
|
| 590 |
-
|
| 591 |
-
|
| 592 |
-
choices=
|
| 593 |
)
|
| 594 |
|
| 595 |
-
# ไธ็็ฅ่ฏ (
|
| 596 |
with gr.Row():
|
| 597 |
toggle_knowledge = gr.Checkbox(label="๐ Knowledge", value=True, scale=0, min_width=150)
|
| 598 |
colfilter_knowledge = gr.CheckboxGroup(
|
| 599 |
choices=initial_knowledge, value=initial_knowledge, show_label=False, scale=3, elem_classes=["aligned-checkboxes"]
|
| 600 |
)
|
| 601 |
|
| 602 |
-
#
|
| 603 |
-
with gr.Row():
|
| 604 |
-
|
| 605 |
-
colfilter_writing = gr.CheckboxGroup(
|
| 606 |
-
choices=initial_writing, value=initial_writing, show_label=False, scale=3, elem_classes=["aligned-checkboxes"]
|
| 607 |
-
)
|
| 608 |
-
|
| 609 |
-
# ๅค่ฏญ่จ (Multilingual) - Coming Soon
|
| 610 |
-
with gr.Row():
|
| 611 |
-
gr.Checkbox(label="๐ Multilingual (Coming Soon)", value=False, interactive=False, scale=0, min_width=250)
|
| 612 |
|
| 613 |
table = gr.HTML(initial_data)
|
| 614 |
|
| 615 |
def update_table_wrapper(
|
| 616 |
-
period, models_size, metric, code_sel,
|
| 617 |
):
|
| 618 |
-
visible_columns = code_sel +
|
| 619 |
return update_table(data_manager, period, models_size, metric, visible_columns, color_columns, size_range, midpoint)
|
| 620 |
|
| 621 |
-
def update_column_choices(period, cur_code,
|
| 622 |
if not period:
|
| 623 |
empty = gr.update(choices=[], value=[])
|
| 624 |
-
return empty, empty, empty
|
| 625 |
columns = data_manager.get_available_columns(period)
|
| 626 |
|
| 627 |
new_code = [c for c in code_cols if c in columns]
|
| 628 |
-
|
| 629 |
-
new_writing = [c for c in writing_cols if c in columns]
|
| 630 |
new_knowledge = [c for c in knowledge_cols if c in columns]
|
| 631 |
|
| 632 |
sel_code = [c for c in cur_code if c in new_code] if cur_code else new_code
|
| 633 |
-
|
| 634 |
-
sel_writing = [c for c in cur_writing if c in new_writing] if cur_writing else new_writing
|
| 635 |
sel_knowledge = [c for c in cur_knowledge if c in new_knowledge] if cur_knowledge else new_knowledge
|
| 636 |
|
| 637 |
if not sel_code:
|
| 638 |
sel_code = new_code
|
| 639 |
-
if not
|
| 640 |
-
|
| 641 |
-
if not sel_writing:
|
| 642 |
-
sel_writing = new_writing
|
| 643 |
if not sel_knowledge:
|
| 644 |
sel_knowledge = new_knowledge
|
| 645 |
|
| 646 |
return (
|
| 647 |
gr.update(choices=new_code, value=sel_code),
|
| 648 |
-
gr.update(choices=
|
| 649 |
-
gr.update(choices=new_writing, value=sel_writing),
|
| 650 |
gr.update(choices=new_knowledge, value=sel_knowledge),
|
| 651 |
)
|
| 652 |
|
|
@@ -656,11 +642,8 @@ if __name__ == "__main__":
|
|
| 656 |
return valid_cols if enabled else []
|
| 657 |
|
| 658 |
toggle_code.change(lambda enabled: toggle_group(enabled, code_cols, initial_columns), inputs=[toggle_code], outputs=[colfilter_code])
|
| 659 |
-
|
| 660 |
-
lambda enabled: toggle_group(enabled,
|
| 661 |
-
)
|
| 662 |
-
toggle_writing.change(
|
| 663 |
-
lambda enabled: toggle_group(enabled, writing_cols, initial_columns), inputs=[toggle_writing], outputs=[colfilter_writing]
|
| 664 |
)
|
| 665 |
toggle_knowledge.change(
|
| 666 |
lambda enabled: toggle_group(enabled, knowledge_cols, initial_columns), inputs=[toggle_knowledge], outputs=[colfilter_knowledge]
|
|
@@ -671,8 +654,7 @@ if __name__ == "__main__":
|
|
| 671 |
model_selector,
|
| 672 |
metric_selector,
|
| 673 |
colfilter_code,
|
| 674 |
-
|
| 675 |
-
colfilter_writing,
|
| 676 |
colfilter_knowledge,
|
| 677 |
color_selector,
|
| 678 |
size_range_slider,
|
|
@@ -681,15 +663,14 @@ if __name__ == "__main__":
|
|
| 681 |
|
| 682 |
period_selector.change(
|
| 683 |
update_column_choices,
|
| 684 |
-
inputs=[period_selector, colfilter_code,
|
| 685 |
-
outputs=[colfilter_code,
|
| 686 |
)
|
| 687 |
period_selector.change(update_table_wrapper, inputs=shared_inputs, outputs=table)
|
| 688 |
model_selector.change(update_table_wrapper, inputs=shared_inputs, outputs=table)
|
| 689 |
metric_selector.change(update_table_wrapper, inputs=shared_inputs, outputs=table)
|
| 690 |
colfilter_code.change(update_table_wrapper, inputs=shared_inputs, outputs=table)
|
| 691 |
-
|
| 692 |
-
colfilter_writing.change(update_table_wrapper, inputs=shared_inputs, outputs=table)
|
| 693 |
colfilter_knowledge.change(update_table_wrapper, inputs=shared_inputs, outputs=table)
|
| 694 |
color_selector.change(update_table_wrapper, inputs=shared_inputs, outputs=table)
|
| 695 |
size_range_slider.change(update_table_wrapper, inputs=shared_inputs, outputs=table)
|
|
@@ -981,15 +962,15 @@ if __name__ == "__main__":
|
|
| 981 |
# ๆฐๆฎ้ๅ่กจ
|
| 982 |
all_datasets = [
|
| 983 |
"github cpp",
|
| 984 |
-
"github python",
|
| 985 |
"github javascript",
|
|
|
|
|
|
|
|
|
|
| 986 |
"arxiv physics",
|
| 987 |
"arxiv cs",
|
| 988 |
-
"arxiv math",
|
| 989 |
-
"ao3 english",
|
| 990 |
-
"github markdown",
|
| 991 |
-
"bbc news",
|
| 992 |
"wikipedia english",
|
|
|
|
|
|
|
| 993 |
]
|
| 994 |
initial_datasets = all_datasets[:4]
|
| 995 |
|
|
|
|
| 565 |
color_selector = gr.CheckboxGroup(label="Colored Columns", choices=["Average", "Individual Tests"], value=initial_colors)
|
| 566 |
with gr.Column():
|
| 567 |
# Data Source ๅ็ปๅฎไน
|
| 568 |
+
code_cols = ["github cpp", "github javascript", "github python", "github markdown"]
|
| 569 |
+
science_cols = ["arxiv math", "arxiv physics", "arxiv cs"]
|
| 570 |
+
knowledge_cols = ["wikipedia english", "bbc news", "ao3 english"]
|
|
|
|
| 571 |
|
| 572 |
initial_code = [c for c in code_cols if c in initial_columns]
|
| 573 |
+
initial_science = [c for c in science_cols if c in initial_columns]
|
|
|
|
| 574 |
initial_knowledge = [c for c in knowledge_cols if c in initial_columns]
|
| 575 |
|
| 576 |
with gr.Column(elem_classes=["data-source-box"]):
|
|
|
|
| 583 |
choices=initial_code, value=initial_code, show_label=False, scale=3, elem_classes=["aligned-checkboxes"]
|
| 584 |
)
|
| 585 |
|
| 586 |
+
# ็งๅญฆ (Science)
|
| 587 |
with gr.Row():
|
| 588 |
+
toggle_science = gr.Checkbox(label="๐ฌ Science", value=True, scale=0, min_width=150)
|
| 589 |
+
colfilter_science = gr.CheckboxGroup(
|
| 590 |
+
choices=initial_science, value=initial_science, show_label=False, scale=3, elem_classes=["aligned-checkboxes"]
|
| 591 |
)
|
| 592 |
|
| 593 |
+
# ไธ็็ฅ่ฏ (Knowledge)
|
| 594 |
with gr.Row():
|
| 595 |
toggle_knowledge = gr.Checkbox(label="๐ Knowledge", value=True, scale=0, min_width=150)
|
| 596 |
colfilter_knowledge = gr.CheckboxGroup(
|
| 597 |
choices=initial_knowledge, value=initial_knowledge, show_label=False, scale=3, elem_classes=["aligned-checkboxes"]
|
| 598 |
)
|
| 599 |
|
| 600 |
+
# # ๅค่ฏญ่จ (Multilingual) - Coming Soon
|
| 601 |
+
# with gr.Row():
|
| 602 |
+
# gr.Checkbox(label="๐ Multilingual (Coming Soon)", value=False, interactive=False, scale=0, min_width=250)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 603 |
|
| 604 |
table = gr.HTML(initial_data)
|
| 605 |
|
| 606 |
def update_table_wrapper(
|
| 607 |
+
period, models_size, metric, code_sel, science_sel, knowledge_sel, color_columns, size_range, midpoint
|
| 608 |
):
|
| 609 |
+
visible_columns = code_sel + science_sel + knowledge_sel
|
| 610 |
return update_table(data_manager, period, models_size, metric, visible_columns, color_columns, size_range, midpoint)
|
| 611 |
|
| 612 |
+
def update_column_choices(period, cur_code, cur_science, cur_knowledge):
|
| 613 |
if not period:
|
| 614 |
empty = gr.update(choices=[], value=[])
|
| 615 |
+
return empty, empty, empty
|
| 616 |
columns = data_manager.get_available_columns(period)
|
| 617 |
|
| 618 |
new_code = [c for c in code_cols if c in columns]
|
| 619 |
+
new_science = [c for c in science_cols if c in columns]
|
|
|
|
| 620 |
new_knowledge = [c for c in knowledge_cols if c in columns]
|
| 621 |
|
| 622 |
sel_code = [c for c in cur_code if c in new_code] if cur_code else new_code
|
| 623 |
+
sel_science = [c for c in cur_science if c in new_science] if cur_science else new_science
|
|
|
|
| 624 |
sel_knowledge = [c for c in cur_knowledge if c in new_knowledge] if cur_knowledge else new_knowledge
|
| 625 |
|
| 626 |
if not sel_code:
|
| 627 |
sel_code = new_code
|
| 628 |
+
if not sel_science:
|
| 629 |
+
sel_science = new_science
|
|
|
|
|
|
|
| 630 |
if not sel_knowledge:
|
| 631 |
sel_knowledge = new_knowledge
|
| 632 |
|
| 633 |
return (
|
| 634 |
gr.update(choices=new_code, value=sel_code),
|
| 635 |
+
gr.update(choices=new_science, value=sel_science),
|
|
|
|
| 636 |
gr.update(choices=new_knowledge, value=sel_knowledge),
|
| 637 |
)
|
| 638 |
|
|
|
|
| 642 |
return valid_cols if enabled else []
|
| 643 |
|
| 644 |
toggle_code.change(lambda enabled: toggle_group(enabled, code_cols, initial_columns), inputs=[toggle_code], outputs=[colfilter_code])
|
| 645 |
+
toggle_science.change(
|
| 646 |
+
lambda enabled: toggle_group(enabled, science_cols, initial_columns), inputs=[toggle_science], outputs=[colfilter_science]
|
|
|
|
|
|
|
|
|
|
| 647 |
)
|
| 648 |
toggle_knowledge.change(
|
| 649 |
lambda enabled: toggle_group(enabled, knowledge_cols, initial_columns), inputs=[toggle_knowledge], outputs=[colfilter_knowledge]
|
|
|
|
| 654 |
model_selector,
|
| 655 |
metric_selector,
|
| 656 |
colfilter_code,
|
| 657 |
+
colfilter_science,
|
|
|
|
| 658 |
colfilter_knowledge,
|
| 659 |
color_selector,
|
| 660 |
size_range_slider,
|
|
|
|
| 663 |
|
| 664 |
period_selector.change(
|
| 665 |
update_column_choices,
|
| 666 |
+
inputs=[period_selector, colfilter_code, colfilter_science, colfilter_knowledge],
|
| 667 |
+
outputs=[colfilter_code, colfilter_science, colfilter_knowledge],
|
| 668 |
)
|
| 669 |
period_selector.change(update_table_wrapper, inputs=shared_inputs, outputs=table)
|
| 670 |
model_selector.change(update_table_wrapper, inputs=shared_inputs, outputs=table)
|
| 671 |
metric_selector.change(update_table_wrapper, inputs=shared_inputs, outputs=table)
|
| 672 |
colfilter_code.change(update_table_wrapper, inputs=shared_inputs, outputs=table)
|
| 673 |
+
colfilter_science.change(update_table_wrapper, inputs=shared_inputs, outputs=table)
|
|
|
|
| 674 |
colfilter_knowledge.change(update_table_wrapper, inputs=shared_inputs, outputs=table)
|
| 675 |
color_selector.change(update_table_wrapper, inputs=shared_inputs, outputs=table)
|
| 676 |
size_range_slider.change(update_table_wrapper, inputs=shared_inputs, outputs=table)
|
|
|
|
| 962 |
# ๆฐๆฎ้ๅ่กจ
|
| 963 |
all_datasets = [
|
| 964 |
"github cpp",
|
|
|
|
| 965 |
"github javascript",
|
| 966 |
+
"github python",
|
| 967 |
+
"github markdown",
|
| 968 |
+
"arxiv math",
|
| 969 |
"arxiv physics",
|
| 970 |
"arxiv cs",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 971 |
"wikipedia english",
|
| 972 |
+
"bbc news",
|
| 973 |
+
"ao3 english",
|
| 974 |
]
|
| 975 |
initial_datasets = all_datasets[:4]
|
| 976 |
|
data_manager.py
CHANGED
|
@@ -246,18 +246,17 @@ class DataManager:
|
|
| 246 |
column_priority = [
|
| 247 |
# ไปฃ็ (Code)
|
| 248 |
"github cpp",
|
| 249 |
-
"github python",
|
| 250 |
"github javascript",
|
| 251 |
-
|
|
|
|
|
|
|
|
|
|
| 252 |
"arxiv physics",
|
| 253 |
"arxiv cs",
|
| 254 |
-
|
| 255 |
-
# ไธ็็ฅ่ฏ (World Knowledge)
|
| 256 |
-
"bbc news",
|
| 257 |
"wikipedia english",
|
| 258 |
-
|
| 259 |
"ao3 english",
|
| 260 |
-
"github markdown",
|
| 261 |
]
|
| 262 |
existing_cols = filtered_df.columns.tolist()
|
| 263 |
ordered_cols = []
|
|
|
|
| 246 |
column_priority = [
|
| 247 |
# ไปฃ็ (Code)
|
| 248 |
"github cpp",
|
|
|
|
| 249 |
"github javascript",
|
| 250 |
+
"github python",
|
| 251 |
+
"github markdown",
|
| 252 |
+
# ็งๅญฆ (Science)
|
| 253 |
+
"arxiv math",
|
| 254 |
"arxiv physics",
|
| 255 |
"arxiv cs",
|
| 256 |
+
# ไธ็็ฅ่ฏ (Knowledge)
|
|
|
|
|
|
|
| 257 |
"wikipedia english",
|
| 258 |
+
"bbc news",
|
| 259 |
"ao3 english",
|
|
|
|
| 260 |
]
|
| 261 |
existing_cols = filtered_df.columns.tolist()
|
| 262 |
ordered_cols = []
|
title.py
CHANGED
|
@@ -27,7 +27,7 @@ table {
|
|
| 27 |
}
|
| 28 |
"""
|
| 29 |
TITLE_HTML = '<h1 style="text-align:center"><span style="font-size:1.3em">๐ LLM Compression Leaderboard (Base Model)</span></h1>'
|
| 30 |
-
SUBTITLE_HTML = "<h1 style='text-align:center'><span style='font-size:0.8em'>Welcome to Uncheatable Eval LLM Compression Leaderboard, where fancy
|
| 31 |
LINKS_HTML = """
|
| 32 |
<div style="display: flex; flex-direction: row; justify-content: center; align-items: center; gap: 20px; margin: 10px 0;">
|
| 33 |
<a href="https://github.com/Jellyfish042/uncheatable_eval" target="_blank" style="text-decoration: none;">
|
|
|
|
| 27 |
}
|
| 28 |
"""
|
| 29 |
TITLE_HTML = '<h1 style="text-align:center"><span style="font-size:1.3em">๐ LLM Compression Leaderboard (Base Model)</span></h1>'
|
| 30 |
+
SUBTITLE_HTML = "<h1 style='text-align:center'><span style='font-size:0.8em'>Welcome to Uncheatable Eval LLM Compression Leaderboard, where fancy fine-tuning and cheating won't work ๐ซ; only compute ๐ป, data ๐, and real innovation ๐ฅ can prevail!</span></h1>"
|
| 31 |
LINKS_HTML = """
|
| 32 |
<div style="display: flex; flex-direction: row; justify-content: center; align-items: center; gap: 20px; margin: 10px 0;">
|
| 33 |
<a href="https://github.com/Jellyfish042/uncheatable_eval" target="_blank" style="text-decoration: none;">
|