ubowang commited on
Commit
17ec649
·
verified ·
1 Parent(s): 653a1dc

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +14 -18
utils.py CHANGED
@@ -18,8 +18,8 @@ MODEL_INFO = [
18
  "Health", "History", "Law", "Math", "Philosophy", "Physics", "Psychology", "Other"]
19
 
20
  DATA_TITLE_TYPE = ['markdown', 'number', 'number', 'number', 'number', 'number', 'number',
21
- 'number', 'number', 'number', 'number', 'number', 'number', 'number',
22
- 'number', 'number']
23
 
24
  SUBMISSION_NAME = "mmlu_pro_leaderboard_submission"
25
  SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/TIGER-Lab/", SUBMISSION_NAME)
@@ -27,10 +27,11 @@ CSV_DIR = "./mmlu_pro_leaderboard_submission/results.csv"
27
 
28
  COLUMN_NAMES = MODEL_INFO
29
 
30
- LEADERBORAD_INTRODUCTION = """# MMLU-Pro Leaderboard
31
- MMLU-Pro dataset, a more robust and challenging massive multi-task understanding dataset tailored to more \
32
- rigorously benchmark large language models' capabilities. This dataset contains 12K \
33
- complex questions across various disciplines.
 
34
  """
35
 
36
  TABLE_INTRODUCTION = """
@@ -50,23 +51,20 @@ SUBMIT_INTRODUCTION = """# Submit on Science Leaderboard Introduction
50
 
51
  ```json
52
  {
53
- "Model": "[NAME]",
54
- "Repo": "https://huggingface.co/[MODEL_NAME],"
55
- "Overall": 56.7,
56
- "Biology": 23.4,
57
- "Business": 45.6,
58
  ...,
59
- "Other: 56.7"
60
  }
61
  ```
62
- After submitting, you can click the "Refresh" button to see the updated leaderboard(it may takes few seconds).
63
 
64
  """
65
 
66
 
67
  def get_df():
68
- print("HF_TOKEN", HF_TOKEN)
69
- print("SUBMISSION_URL", SUBMISSION_URL)
70
  repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN)
71
  repo.git_pull()
72
  df = pd.read_csv(CSV_DIR)
@@ -81,7 +79,7 @@ def add_new_eval(
81
  return "Error! Empty file!"
82
 
83
  upload_data = json.loads(input_file)
84
- data_row = [f'[{upload_data["Model"]}]({upload_data["Repo"]})', upload_data['Overall']]
85
  for subject in SUBJECTS:
86
  data_row += [upload_data[subject]]
87
 
@@ -109,5 +107,3 @@ def add_new_eval(
109
  def refresh_data():
110
  return get_df()
111
 
112
-
113
-
 
18
  "Health", "History", "Law", "Math", "Philosophy", "Physics", "Psychology", "Other"]
19
 
20
  DATA_TITLE_TYPE = ['markdown', 'number', 'number', 'number', 'number', 'number', 'number',
21
+ 'number', 'number', 'number', 'number', 'number', 'number', 'number',
22
+ 'number', 'number']
23
 
24
  SUBMISSION_NAME = "mmlu_pro_leaderboard_submission"
25
  SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/TIGER-Lab/", SUBMISSION_NAME)
 
27
 
28
  COLUMN_NAMES = MODEL_INFO
29
 
30
+ LEADERBOARD_INTRODUCTION = """# MMLU-Pro Leaderboard
31
+
32
+ MMLU-Pro dataset, a more robust and challenging massive multi-task understanding dataset tailored to more rigorously benchmark large language models' capabilities. This dataset contains 12K complex questions across various disciplines. The following are the accuracies of various models evaluated on MMLU-Pro.
33
+
34
+ We invite you to use our dataset available at [https://huggingface.co/datasets/TIGER-Lab/MMLU-Pro](https://huggingface.co/datasets/TIGER-Lab/MMLU-Pro). If you want to reproduce our results or evaluate your own models on MMLU-Pro, please check out our evaluation scripts at [https://github.com/TIGER-AI-Lab/MMLU-Pro](https://github.com/TIGER-AI-Lab/MMLU-Pro).
35
  """
36
 
37
  TABLE_INTRODUCTION = """
 
51
 
52
  ```json
53
  {
54
+ "Model": "[MODEL_NAME]",
55
+ "Overall": 0.5678,
56
+ "Biology": 0.1234,
57
+ "Business": 0.4567,
 
58
  ...,
59
+ "Other: 0.3456"
60
  }
61
  ```
62
+ After submitting, you can click the "Refresh" button to see the updated leaderboard (it may takes few seconds).
63
 
64
  """
65
 
66
 
67
  def get_df():
 
 
68
  repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN)
69
  repo.git_pull()
70
  df = pd.read_csv(CSV_DIR)
 
79
  return "Error! Empty file!"
80
 
81
  upload_data = json.loads(input_file)
82
+ data_row = [f'{upload_data["Model"]}', upload_data['Overall']]
83
  for subject in SUBJECTS:
84
  data_row += [upload_data[subject]]
85
 
 
107
  def refresh_data():
108
  return get_df()
109