Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 3,694 Bytes
e353a82 e3ab3e6 e353a82 3a68c5a 17ec649 e353a82 17ec649 e353a82 17ec649 e353a82 17ec649 e353a82 17ec649 e353a82 17ec649 e353a82 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import pandas as pd
import gradio as gr
import csv
import json
import os
import shutil
from huggingface_hub import Repository
HF_TOKEN = os.environ.get("HF_TOKEN")
SUBJECTS = ["Biology", "Business", "Chemistry", "Computer Science", "Economics", "Engineering",
"Health", "History", "Law", "Math", "Philosophy", "Physics", "Psychology", "Other"]
MODEL_INFO = [
"Models",
"Overall",
"Biology", "Business", "Chemistry", "Computer Science", "Economics", "Engineering",
"Health", "History", "Law", "Math", "Philosophy", "Physics", "Psychology", "Other"]
DATA_TITLE_TYPE = ['markdown', 'number', 'number', 'number', 'number', 'number', 'number',
'number', 'number', 'number', 'number', 'number', 'number', 'number',
'number', 'number']
SUBMISSION_NAME = "mmlu_pro_leaderboard_submission"
SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/TIGER-Lab/", SUBMISSION_NAME)
CSV_DIR = "./mmlu_pro_leaderboard_submission/results.csv"
COLUMN_NAMES = MODEL_INFO
LEADERBOARD_INTRODUCTION = """# MMLU-Pro Leaderboard
MMLU-Pro dataset, a more robust and challenging massive multi-task understanding dataset tailored to more rigorously benchmark large language models' capabilities. This dataset contains 12K complex questions across various disciplines. The following are the accuracies of various models evaluated on MMLU-Pro.
We invite you to use our dataset available at [https://huggingface.co/datasets/TIGER-Lab/MMLU-Pro](https://huggingface.co/datasets/TIGER-Lab/MMLU-Pro). If you want to reproduce our results or evaluate your own models on MMLU-Pro, please check out our evaluation scripts at [https://github.com/TIGER-AI-Lab/MMLU-Pro](https://github.com/TIGER-AI-Lab/MMLU-Pro).
"""
TABLE_INTRODUCTION = """
"""
LEADERBORAD_INFO = """
We list the information of the used datasets as follows:<br>
"""
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
CITATION_BUTTON_TEXT = r""""""
SUBMIT_INTRODUCTION = """# Submit on Science Leaderboard Introduction
## ⚠ Please note that you need to submit the json file with following format:
```json
{
"Model": "[MODEL_NAME]",
"Overall": 0.5678,
"Biology": 0.1234,
"Business": 0.4567,
...,
"Other: 0.3456"
}
```
After submitting, you can click the "Refresh" button to see the updated leaderboard (it may takes few seconds).
"""
def get_df():
repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN)
repo.git_pull()
df = pd.read_csv(CSV_DIR)
df = df.sort_values(by=['Overall'], ascending=False)
return df[COLUMN_NAMES]
def add_new_eval(
input_file,
):
if input_file is None:
return "Error! Empty file!"
upload_data = json.loads(input_file)
data_row = [f'{upload_data["Model"]}', upload_data['Overall']]
for subject in SUBJECTS:
data_row += [upload_data[subject]]
submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL,
use_auth_token=HF_TOKEN, repo_type="dataset")
submission_repo.git_pull()
already_submitted = []
with open(CSV_DIR, mode='r') as file:
reader = csv.reader(file, delimiter=',')
for row in reader:
already_submitted.append(row[0])
if data_row[0] not in already_submitted:
with open(CSV_DIR, mode='a', newline='') as file:
writer = csv.writer(file)
writer.writerow(data_row)
submission_repo.push_to_hub()
print('Submission Successful')
else:
print('The entry already exists')
def refresh_data():
return get_df()
|