Spaces:
Running
Running
import os | |
import json | |
import pandas as pd | |
import gradio as gr | |
from agents.llm_registry import LLM_REGISTRY # Dynamically fetch LLM models | |
# Extract available LLM models | |
llm_models = list(LLM_REGISTRY.keys()) | |
# Define game list manually (for now) | |
games_list = [ | |
"rock_paper_scissors", | |
"prisoners_dilemma", | |
"tic_tac_toe", | |
"connect_four", | |
"matching_pennies", | |
"kuhn_poker", | |
] | |
# File to persist results | |
RESULTS_TRACKER_FILE = "results_tracker.json" | |
# Load or initialize the results tracker | |
if os.path.exists(RESULTS_TRACKER_FILE): | |
with open(RESULTS_TRACKER_FILE, "r") as f: | |
results_tracker = json.load(f) | |
else: | |
results_tracker = { | |
llm: {game: {"games": 0, "moves/game": 0, "illegal-moves": 0, | |
"win-rate": 0, "vs Random": 0} for game in games_list} | |
for llm in llm_models | |
} | |
def save_results_tracker(): | |
"""Save the results tracker to a JSON file.""" | |
with open(RESULTS_TRACKER_FILE, "w") as f: | |
json.dump(results_tracker, f, indent=4) | |
def calculate_leaderboard(selected_game: str) -> pd.DataFrame: | |
"""Generate a structured leaderboard table for the selected game.""" | |
leaderboard_df = pd.DataFrame(index=llm_models, | |
columns=["# games", "moves/game", | |
"illegal-moves", "win-rate", "vs Random"]) | |
for llm in llm_models: | |
game_stats = results_tracker[llm].get(selected_game, {}) | |
leaderboard_df.loc[llm] = [ | |
game_stats.get("games", 0), | |
game_stats.get("moves/game", 0), | |
game_stats.get("illegal-moves", 0), | |
f"{game_stats.get('win-rate', 0):.1f}%", | |
f"{game_stats.get('vs Random', 0):.1f}%" | |
] | |
leaderboard_df = leaderboard_df.reset_index() | |
leaderboard_df.rename(columns={"index": "LLM Model"}, inplace=True) | |
return leaderboard_df | |
# Gradio Interface | |
with gr.Blocks() as interface: | |
with gr.Tab("Game Arena"): | |
gr.Markdown("# LLM Game Arena\nPlay against LLMs or other players in classic games!") | |
with gr.Tab("Leaderboard"): | |
gr.Markdown("# LLM Model Leaderboard\nTrack performance across different games!") | |
game_dropdown = gr.Dropdown(choices=games_list, label="Select Game", value=games_list[0]) | |
leaderboard_table = gr.Dataframe(value=calculate_leaderboard(games_list[0]), label="Leaderboard") | |
def update_leaderboard(selected_game): | |
"""Updates the leaderboard table based on the selected game.""" | |
return calculate_leaderboard(selected_game) | |
game_dropdown.change(fn=update_leaderboard, inputs=[game_dropdown], outputs=[leaderboard_table]) | |
interface.launch() | |