lcipolina commited on
Commit
4c30414
·
verified ·
1 Parent(s): 27ff230

Updated to have games as drop-down list

Browse files
Files changed (1) hide show
  1. app.py +25 -57
app.py CHANGED
@@ -26,83 +26,51 @@ if os.path.exists(RESULTS_TRACKER_FILE):
26
  results_tracker = json.load(f)
27
  else:
28
  results_tracker = {
29
- llm: {game: {"wins": 0, "ties": 0, "losses": 0, "games": 0} for game in games_list}
 
30
  for llm in llm_models
31
  }
32
 
33
-
34
  def save_results_tracker():
35
  """Save the results tracker to a JSON file."""
36
  with open(RESULTS_TRACKER_FILE, "w") as f:
37
  json.dump(results_tracker, f, indent=4)
38
 
39
-
40
- def calculate_leaderboard():
41
- """Generate a structured leaderboard table summarizing LLM performance across games."""
 
 
42
 
43
- # Create a DataFrame where rows are LLMs and columns are games
44
- leaderboard_df = pd.DataFrame(index=llm_models, columns=games_list)
45
-
46
  for llm in llm_models:
47
- for game in games_list:
48
- games_played = max(1, results_tracker[llm][game]['games']) # Avoid division by zero
49
- wins = (results_tracker[llm][game]['wins'] / games_played) * 100
50
- ties = (results_tracker[llm][game]['ties'] / games_played) * 100
51
- losses = (results_tracker[llm][game]['losses'] / games_played) * 100
52
-
53
- # Format as percentage string
54
- leaderboard_df.loc[llm, game] = f"{wins:.1f}% W / {ties:.1f}% T / {losses:.1f}% L"
55
-
56
- # Ensure LLM names appear in the first column
57
  leaderboard_df = leaderboard_df.reset_index()
58
  leaderboard_df.rename(columns={"index": "LLM Model"}, inplace=True)
59
-
60
  return leaderboard_df
61
 
62
-
63
- def get_model_details(model_name):
64
- """Returns detailed performance breakdown of the selected LLM model."""
65
- if model_name not in results_tracker:
66
- return "No data available for this model."
67
-
68
- details = f"### {model_name} Performance Breakdown\n"
69
- for game, record in results_tracker[model_name].items():
70
- total_games = record["games"]
71
- details += (
72
- f"- **{game.capitalize()}**: {record['wins']} Wins, "
73
- f"{record['ties']} Ties, {record['losses']} Losses (Total: {total_games})\n"
74
- )
75
-
76
- return details
77
-
78
-
79
  # Gradio Interface
80
  with gr.Blocks() as interface:
81
  with gr.Tab("Game Arena"):
82
  gr.Markdown("# LLM Game Arena\nPlay against LLMs or other players in classic games!")
83
 
84
- # (Game selection and play functionality remains unchanged)
85
-
86
  with gr.Tab("Leaderboard"):
87
  gr.Markdown("# LLM Model Leaderboard\nTrack performance across different games!")
88
-
89
- leaderboard_table = gr.Dataframe(value=calculate_leaderboard(), label="Leaderboard")
90
-
91
- with gr.Row():
92
- model_dropdown = gr.Dropdown(choices=llm_models, label="Select LLM Model")
93
- details_output = gr.Textbox(label="Model Performance Details", interactive=False)
94
-
95
- def update_leaderboard():
96
- """Updates the leaderboard table."""
97
- return calculate_leaderboard()
98
-
99
- def update_details(model_name):
100
- """Updates the details section when an LLM is selected."""
101
- return get_model_details(model_name)
102
-
103
- update_leaderboard_button = gr.Button("Refresh Leaderboard")
104
- update_leaderboard_button.click(fn=update_leaderboard, inputs=[], outputs=[leaderboard_table])
105
-
106
- model_dropdown.change(fn=update_details, inputs=[model_dropdown], outputs=[details_output])
107
 
108
  interface.launch()
 
26
  results_tracker = json.load(f)
27
  else:
28
  results_tracker = {
29
+ llm: {game: {"games": 0, "moves/game": 0, "illegal-moves": 0,
30
+ "win-rate": 0, "vs Random": 0} for game in games_list}
31
  for llm in llm_models
32
  }
33
 
 
34
  def save_results_tracker():
35
  """Save the results tracker to a JSON file."""
36
  with open(RESULTS_TRACKER_FILE, "w") as f:
37
  json.dump(results_tracker, f, indent=4)
38
 
39
+ def calculate_leaderboard(selected_game: str) -> pd.DataFrame:
40
+ """Generate a structured leaderboard table for the selected game."""
41
+ leaderboard_df = pd.DataFrame(index=llm_models,
42
+ columns=["# games", "moves/game",
43
+ "illegal-moves", "win-rate", "vs Random"])
44
 
 
 
 
45
  for llm in llm_models:
46
+ game_stats = results_tracker[llm].get(selected_game, {})
47
+ leaderboard_df.loc[llm] = [
48
+ game_stats.get("games", 0),
49
+ game_stats.get("moves/game", 0),
50
+ game_stats.get("illegal-moves", 0),
51
+ f"{game_stats.get('win-rate', 0):.1f}%",
52
+ f"{game_stats.get('vs Random', 0):.1f}%"
53
+ ]
54
+
 
55
  leaderboard_df = leaderboard_df.reset_index()
56
  leaderboard_df.rename(columns={"index": "LLM Model"}, inplace=True)
 
57
  return leaderboard_df
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  # Gradio Interface
60
  with gr.Blocks() as interface:
61
  with gr.Tab("Game Arena"):
62
  gr.Markdown("# LLM Game Arena\nPlay against LLMs or other players in classic games!")
63
 
 
 
64
  with gr.Tab("Leaderboard"):
65
  gr.Markdown("# LLM Model Leaderboard\nTrack performance across different games!")
66
+
67
+ game_dropdown = gr.Dropdown(choices=games_list, label="Select Game", value=games_list[0])
68
+ leaderboard_table = gr.Dataframe(value=calculate_leaderboard(games_list[0]), label="Leaderboard")
69
+
70
+ def update_leaderboard(selected_game):
71
+ """Updates the leaderboard table based on the selected game."""
72
+ return calculate_leaderboard(selected_game)
73
+
74
+ game_dropdown.change(fn=update_leaderboard, inputs=[game_dropdown], outputs=[leaderboard_table])
 
 
 
 
 
 
 
 
 
 
75
 
76
  interface.launch()