import gradio as gr from gradio_leaderboard import Leaderboard, SelectColumns, ColumnFilter import config from pathlib import Path import pandas as pd from datetime import datetime abs_path = Path(__file__).parent df = pd.read_json(str(abs_path / "leader_board.json")) # Randomly set True/ False for the "MOE" column # # print(df.info()) # print(df.columns) # print(df.head(1)) head_content = """ # 🏅 BlinkCode Leaderboard ### Welcome to the BlinkCode Leaderboard! On this leaderboard we share the evaluation results of MLLMs obtained by the [OpenSource Framework](https://github.com/YJQuantumLeap/BlinkCode). ### Currently, BlinkCode Leaderboard covers different VLMs (including GPT-4v, Gemini, QwenVLMAX, LLaVA, etc.) and 9 different task. ## Main Evaluation Results - Metrics: - Avg Score: The average score on all task (normalized to 0 - 100, the higher the better). - The scores in the 5 tasks (HumanEval-V, MBPP-V, GSM8K-V, MATH-V, VP) represent the percentage of accuracy. - The scores in the image reconstruction tasks (Matplotlib, SVG, TikZ, Webpage) represent the similarity between the reconstructed images and the original images (normalized to 0 - 100, the higher the better). - By default, we present the unrefined evaluation results,, sorted by the descending order of Avg Score⬆️. - The ⭐ symbol indicates results that have undergone two rounds of refinement. This leaderboard was last updated: . """ CITATION_BUTTON_TEXT = r"""@misc{2023opencompass, title={OpenCompass: A Universal Evaluation Platform for Foundation Models}, author={OpenCompass Contributors}, howpublished = {\url{https://github.com/open-compass/opencompass}}, year={2023} }""" CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" unique_models_count = df["Model"].nunique() # print(unique_models_count) nowtime = datetime.now() formatted_time = nowtime.strftime("%y.%m.%d %H:%M:%S") head_content = head_content.replace("", formatted_time).replace('', str(unique_models_count)) with gr.Blocks() as demo: gr.Markdown(head_content) with gr.Tabs(): Leaderboard( value=df, select_columns=SelectColumns( default_selection=config.ON_LOAD_COLUMNS, cant_deselect=["Rank", "Model"], label="Select Columns to Display:", ), search_columns=["Model", "Model Type"], hide_columns=["Model Size", "Model Type", "Supports multiple images"], filter_columns=[ "Model Size", "Model Type", "Supports multiple images" # ColumnFilter("Params (B)", default=[0, 20]), ], datatype=config.TYPES, column_widths=["5%", "15%"], ) with gr.Row(): with gr.Accordion('Citation', open=False): citation_button = gr.Textbox( value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, elem_id='citation-button') if __name__ == "__main__": demo.launch()