Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,7 +6,15 @@ import logging
|
|
| 6 |
import arena_config
|
| 7 |
import plotly.graph_objects as go
|
| 8 |
from typing import Dict
|
| 9 |
-
from leaderboard import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
|
| 12 |
# Initialize logging for errors only
|
|
@@ -122,6 +130,7 @@ def record_vote(prompt, left_response, right_response, left_model, right_model,
|
|
| 122 |
return (
|
| 123 |
gr.update(value=result_message, visible=True), # Show result as Markdown
|
| 124 |
get_leaderboard(), # Update leaderboard
|
|
|
|
| 125 |
gr.update(interactive=False), # Disable left vote button
|
| 126 |
gr.update(interactive=False), # Disable right vote button
|
| 127 |
gr.update(interactive=False), # Disable tie button
|
|
@@ -200,6 +209,8 @@ def get_leaderboard_chart():
|
|
| 200 |
)
|
| 201 |
)
|
| 202 |
|
|
|
|
|
|
|
| 203 |
return fig
|
| 204 |
|
| 205 |
def new_battle():
|
|
@@ -315,6 +326,10 @@ with gr.Blocks(css="""
|
|
| 315 |
with gr.Tab("Performance Chart"):
|
| 316 |
leaderboard_chart = gr.Plot(label="Model Performance Chart")
|
| 317 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 318 |
# Define interactions
|
| 319 |
submit_btn.click(
|
| 320 |
battle_arena,
|
|
@@ -327,14 +342,14 @@ with gr.Blocks(css="""
|
|
| 327 |
left_vote_btn.click(
|
| 328 |
lambda *args: record_vote(*args, "Left is better"),
|
| 329 |
inputs=[prompt_input, left_output, right_output, left_model, right_model],
|
| 330 |
-
outputs=[result, leaderboard, left_vote_btn,
|
| 331 |
right_vote_btn, tie_btn, model_names_row, leaderboard_chart]
|
| 332 |
)
|
| 333 |
|
| 334 |
right_vote_btn.click(
|
| 335 |
lambda *args: record_vote(*args, "Right is better"),
|
| 336 |
inputs=[prompt_input, left_output, right_output, left_model, right_model],
|
| 337 |
-
outputs=[result, leaderboard, left_vote_btn,
|
| 338 |
right_vote_btn, tie_btn, model_names_row, leaderboard_chart]
|
| 339 |
)
|
| 340 |
|
|
@@ -353,7 +368,10 @@ with gr.Blocks(css="""
|
|
| 353 |
|
| 354 |
# Update leaderboard and chart on launch
|
| 355 |
demo.load(get_leaderboard, outputs=leaderboard)
|
|
|
|
| 356 |
demo.load(get_leaderboard_chart, outputs=leaderboard_chart)
|
| 357 |
|
| 358 |
if __name__ == "__main__":
|
| 359 |
-
|
|
|
|
|
|
|
|
|
| 6 |
import arena_config
|
| 7 |
import plotly.graph_objects as go
|
| 8 |
from typing import Dict
|
| 9 |
+
from leaderboard import (
|
| 10 |
+
get_current_leaderboard,
|
| 11 |
+
update_leaderboard,
|
| 12 |
+
start_backup_thread,
|
| 13 |
+
get_leaderboard,
|
| 14 |
+
get_elo_leaderboard,
|
| 15 |
+
ensure_elo_ratings_initialized
|
| 16 |
+
)
|
| 17 |
+
import sys
|
| 18 |
|
| 19 |
|
| 20 |
# Initialize logging for errors only
|
|
|
|
| 130 |
return (
|
| 131 |
gr.update(value=result_message, visible=True), # Show result as Markdown
|
| 132 |
get_leaderboard(), # Update leaderboard
|
| 133 |
+
get_elo_leaderboard(), # Add this line
|
| 134 |
gr.update(interactive=False), # Disable left vote button
|
| 135 |
gr.update(interactive=False), # Disable right vote button
|
| 136 |
gr.update(interactive=False), # Disable tie button
|
|
|
|
| 209 |
)
|
| 210 |
)
|
| 211 |
|
| 212 |
+
chart_data = fig.to_json()
|
| 213 |
+
print(f"Chart size: {sys.getsizeof(chart_data)} bytes")
|
| 214 |
return fig
|
| 215 |
|
| 216 |
def new_battle():
|
|
|
|
| 326 |
with gr.Tab("Performance Chart"):
|
| 327 |
leaderboard_chart = gr.Plot(label="Model Performance Chart")
|
| 328 |
|
| 329 |
+
# ELO Leaderboard Tab
|
| 330 |
+
with gr.Tab("ELO Leaderboard"):
|
| 331 |
+
elo_leaderboard = gr.HTML(label="ELO Leaderboard")
|
| 332 |
+
|
| 333 |
# Define interactions
|
| 334 |
submit_btn.click(
|
| 335 |
battle_arena,
|
|
|
|
| 342 |
left_vote_btn.click(
|
| 343 |
lambda *args: record_vote(*args, "Left is better"),
|
| 344 |
inputs=[prompt_input, left_output, right_output, left_model, right_model],
|
| 345 |
+
outputs=[result, leaderboard, elo_leaderboard, left_vote_btn,
|
| 346 |
right_vote_btn, tie_btn, model_names_row, leaderboard_chart]
|
| 347 |
)
|
| 348 |
|
| 349 |
right_vote_btn.click(
|
| 350 |
lambda *args: record_vote(*args, "Right is better"),
|
| 351 |
inputs=[prompt_input, left_output, right_output, left_model, right_model],
|
| 352 |
+
outputs=[result, leaderboard, elo_leaderboard, left_vote_btn,
|
| 353 |
right_vote_btn, tie_btn, model_names_row, leaderboard_chart]
|
| 354 |
)
|
| 355 |
|
|
|
|
| 368 |
|
| 369 |
# Update leaderboard and chart on launch
|
| 370 |
demo.load(get_leaderboard, outputs=leaderboard)
|
| 371 |
+
demo.load(get_elo_leaderboard, outputs=elo_leaderboard)
|
| 372 |
demo.load(get_leaderboard_chart, outputs=leaderboard_chart)
|
| 373 |
|
| 374 |
if __name__ == "__main__":
|
| 375 |
+
# Initialize ELO ratings before launching the app
|
| 376 |
+
ensure_elo_ratings_initialized()
|
| 377 |
+
demo.launch(show_api=False)
|