Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update utils.py
Browse files
utils.py
CHANGED
@@ -18,8 +18,8 @@ MODEL_INFO = [
|
|
18 |
"Health", "History", "Law", "Math", "Philosophy", "Physics", "Psychology", "Other"]
|
19 |
|
20 |
DATA_TITLE_TYPE = ['markdown', 'number', 'number', 'number', 'number', 'number', 'number',
|
21 |
-
|
22 |
-
|
23 |
|
24 |
SUBMISSION_NAME = "mmlu_pro_leaderboard_submission"
|
25 |
SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/TIGER-Lab/", SUBMISSION_NAME)
|
@@ -27,10 +27,11 @@ CSV_DIR = "./mmlu_pro_leaderboard_submission/results.csv"
|
|
27 |
|
28 |
COLUMN_NAMES = MODEL_INFO
|
29 |
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
34 |
"""
|
35 |
|
36 |
TABLE_INTRODUCTION = """
|
@@ -50,23 +51,20 @@ SUBMIT_INTRODUCTION = """# Submit on Science Leaderboard Introduction
|
|
50 |
|
51 |
```json
|
52 |
{
|
53 |
-
"Model": "[
|
54 |
-
"
|
55 |
-
"
|
56 |
-
"
|
57 |
-
"Business": 45.6,
|
58 |
...,
|
59 |
-
"Other:
|
60 |
}
|
61 |
```
|
62 |
-
After submitting, you can click the "Refresh" button to see the updated leaderboard(it may takes few seconds).
|
63 |
|
64 |
"""
|
65 |
|
66 |
|
67 |
def get_df():
|
68 |
-
print("HF_TOKEN", HF_TOKEN)
|
69 |
-
print("SUBMISSION_URL", SUBMISSION_URL)
|
70 |
repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN)
|
71 |
repo.git_pull()
|
72 |
df = pd.read_csv(CSV_DIR)
|
@@ -81,7 +79,7 @@ def add_new_eval(
|
|
81 |
return "Error! Empty file!"
|
82 |
|
83 |
upload_data = json.loads(input_file)
|
84 |
-
data_row = [f'
|
85 |
for subject in SUBJECTS:
|
86 |
data_row += [upload_data[subject]]
|
87 |
|
@@ -109,5 +107,3 @@ def add_new_eval(
|
|
109 |
def refresh_data():
|
110 |
return get_df()
|
111 |
|
112 |
-
|
113 |
-
|
|
|
18 |
"Health", "History", "Law", "Math", "Philosophy", "Physics", "Psychology", "Other"]
|
19 |
|
20 |
DATA_TITLE_TYPE = ['markdown', 'number', 'number', 'number', 'number', 'number', 'number',
|
21 |
+
'number', 'number', 'number', 'number', 'number', 'number', 'number',
|
22 |
+
'number', 'number']
|
23 |
|
24 |
SUBMISSION_NAME = "mmlu_pro_leaderboard_submission"
|
25 |
SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/TIGER-Lab/", SUBMISSION_NAME)
|
|
|
27 |
|
28 |
COLUMN_NAMES = MODEL_INFO
|
29 |
|
30 |
+
LEADERBOARD_INTRODUCTION = """# MMLU-Pro Leaderboard
|
31 |
+
|
32 |
+
MMLU-Pro dataset, a more robust and challenging massive multi-task understanding dataset tailored to more rigorously benchmark large language models' capabilities. This dataset contains 12K complex questions across various disciplines. The following are the accuracies of various models evaluated on MMLU-Pro.
|
33 |
+
|
34 |
+
We invite you to use our dataset available at [https://huggingface.co/datasets/TIGER-Lab/MMLU-Pro](https://huggingface.co/datasets/TIGER-Lab/MMLU-Pro). If you want to reproduce our results or evaluate your own models on MMLU-Pro, please check out our evaluation scripts at [https://github.com/TIGER-AI-Lab/MMLU-Pro](https://github.com/TIGER-AI-Lab/MMLU-Pro).
|
35 |
"""
|
36 |
|
37 |
TABLE_INTRODUCTION = """
|
|
|
51 |
|
52 |
```json
|
53 |
{
|
54 |
+
"Model": "[MODEL_NAME]",
|
55 |
+
"Overall": 0.5678,
|
56 |
+
"Biology": 0.1234,
|
57 |
+
"Business": 0.4567,
|
|
|
58 |
...,
|
59 |
+
"Other: 0.3456"
|
60 |
}
|
61 |
```
|
62 |
+
After submitting, you can click the "Refresh" button to see the updated leaderboard (it may takes few seconds).
|
63 |
|
64 |
"""
|
65 |
|
66 |
|
67 |
def get_df():
|
|
|
|
|
68 |
repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN)
|
69 |
repo.git_pull()
|
70 |
df = pd.read_csv(CSV_DIR)
|
|
|
79 |
return "Error! Empty file!"
|
80 |
|
81 |
upload_data = json.loads(input_file)
|
82 |
+
data_row = [f'{upload_data["Model"]}', upload_data['Overall']]
|
83 |
for subject in SUBJECTS:
|
84 |
data_row += [upload_data[subject]]
|
85 |
|
|
|
107 |
def refresh_data():
|
108 |
return get_df()
|
109 |
|
|
|
|