Spaces:
Running
Running
Updated to have games as drop-down list
Browse files
app.py
CHANGED
@@ -26,83 +26,51 @@ if os.path.exists(RESULTS_TRACKER_FILE):
|
|
26 |
results_tracker = json.load(f)
|
27 |
else:
|
28 |
results_tracker = {
|
29 |
-
llm: {game: {"
|
|
|
30 |
for llm in llm_models
|
31 |
}
|
32 |
|
33 |
-
|
34 |
def save_results_tracker():
|
35 |
"""Save the results tracker to a JSON file."""
|
36 |
with open(RESULTS_TRACKER_FILE, "w") as f:
|
37 |
json.dump(results_tracker, f, indent=4)
|
38 |
|
39 |
-
|
40 |
-
|
41 |
-
|
|
|
|
|
42 |
|
43 |
-
# Create a DataFrame where rows are LLMs and columns are games
|
44 |
-
leaderboard_df = pd.DataFrame(index=llm_models, columns=games_list)
|
45 |
-
|
46 |
for llm in llm_models:
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
# Ensure LLM names appear in the first column
|
57 |
leaderboard_df = leaderboard_df.reset_index()
|
58 |
leaderboard_df.rename(columns={"index": "LLM Model"}, inplace=True)
|
59 |
-
|
60 |
return leaderboard_df
|
61 |
|
62 |
-
|
63 |
-
def get_model_details(model_name):
|
64 |
-
"""Returns detailed performance breakdown of the selected LLM model."""
|
65 |
-
if model_name not in results_tracker:
|
66 |
-
return "No data available for this model."
|
67 |
-
|
68 |
-
details = f"### {model_name} Performance Breakdown\n"
|
69 |
-
for game, record in results_tracker[model_name].items():
|
70 |
-
total_games = record["games"]
|
71 |
-
details += (
|
72 |
-
f"- **{game.capitalize()}**: {record['wins']} Wins, "
|
73 |
-
f"{record['ties']} Ties, {record['losses']} Losses (Total: {total_games})\n"
|
74 |
-
)
|
75 |
-
|
76 |
-
return details
|
77 |
-
|
78 |
-
|
79 |
# Gradio Interface
|
80 |
with gr.Blocks() as interface:
|
81 |
with gr.Tab("Game Arena"):
|
82 |
gr.Markdown("# LLM Game Arena\nPlay against LLMs or other players in classic games!")
|
83 |
|
84 |
-
# (Game selection and play functionality remains unchanged)
|
85 |
-
|
86 |
with gr.Tab("Leaderboard"):
|
87 |
gr.Markdown("# LLM Model Leaderboard\nTrack performance across different games!")
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
return calculate_leaderboard()
|
98 |
-
|
99 |
-
def update_details(model_name):
|
100 |
-
"""Updates the details section when an LLM is selected."""
|
101 |
-
return get_model_details(model_name)
|
102 |
-
|
103 |
-
update_leaderboard_button = gr.Button("Refresh Leaderboard")
|
104 |
-
update_leaderboard_button.click(fn=update_leaderboard, inputs=[], outputs=[leaderboard_table])
|
105 |
-
|
106 |
-
model_dropdown.change(fn=update_details, inputs=[model_dropdown], outputs=[details_output])
|
107 |
|
108 |
interface.launch()
|
|
|
26 |
results_tracker = json.load(f)
|
27 |
else:
|
28 |
results_tracker = {
|
29 |
+
llm: {game: {"games": 0, "moves/game": 0, "illegal-moves": 0,
|
30 |
+
"win-rate": 0, "vs Random": 0} for game in games_list}
|
31 |
for llm in llm_models
|
32 |
}
|
33 |
|
|
|
34 |
def save_results_tracker():
|
35 |
"""Save the results tracker to a JSON file."""
|
36 |
with open(RESULTS_TRACKER_FILE, "w") as f:
|
37 |
json.dump(results_tracker, f, indent=4)
|
38 |
|
39 |
+
def calculate_leaderboard(selected_game: str) -> pd.DataFrame:
|
40 |
+
"""Generate a structured leaderboard table for the selected game."""
|
41 |
+
leaderboard_df = pd.DataFrame(index=llm_models,
|
42 |
+
columns=["# games", "moves/game",
|
43 |
+
"illegal-moves", "win-rate", "vs Random"])
|
44 |
|
|
|
|
|
|
|
45 |
for llm in llm_models:
|
46 |
+
game_stats = results_tracker[llm].get(selected_game, {})
|
47 |
+
leaderboard_df.loc[llm] = [
|
48 |
+
game_stats.get("games", 0),
|
49 |
+
game_stats.get("moves/game", 0),
|
50 |
+
game_stats.get("illegal-moves", 0),
|
51 |
+
f"{game_stats.get('win-rate', 0):.1f}%",
|
52 |
+
f"{game_stats.get('vs Random', 0):.1f}%"
|
53 |
+
]
|
54 |
+
|
|
|
55 |
leaderboard_df = leaderboard_df.reset_index()
|
56 |
leaderboard_df.rename(columns={"index": "LLM Model"}, inplace=True)
|
|
|
57 |
return leaderboard_df
|
58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
# Gradio Interface
|
60 |
with gr.Blocks() as interface:
|
61 |
with gr.Tab("Game Arena"):
|
62 |
gr.Markdown("# LLM Game Arena\nPlay against LLMs or other players in classic games!")
|
63 |
|
|
|
|
|
64 |
with gr.Tab("Leaderboard"):
|
65 |
gr.Markdown("# LLM Model Leaderboard\nTrack performance across different games!")
|
66 |
+
|
67 |
+
game_dropdown = gr.Dropdown(choices=games_list, label="Select Game", value=games_list[0])
|
68 |
+
leaderboard_table = gr.Dataframe(value=calculate_leaderboard(games_list[0]), label="Leaderboard")
|
69 |
+
|
70 |
+
def update_leaderboard(selected_game):
|
71 |
+
"""Updates the leaderboard table based on the selected game."""
|
72 |
+
return calculate_leaderboard(selected_game)
|
73 |
+
|
74 |
+
game_dropdown.change(fn=update_leaderboard, inputs=[game_dropdown], outputs=[leaderboard_table])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
|
76 |
interface.launch()
|