Spaces:

TIGER-Lab
/

MMLU-Pro

Running on CPU Upgrade

App Files Files Community

MMLU-Pro / utils.py

ubowang

Update utils.py

17ec649 verified 9 months ago

raw

history blame

3.69 kB

	import pandas as pd
	import gradio as gr
	import csv
	import json
	import os
	import shutil
	from huggingface_hub import Repository

	HF_TOKEN = os.environ.get("HF_TOKEN")

	SUBJECTS = ["Biology", "Business", "Chemistry", "Computer Science", "Economics", "Engineering",
	"Health", "History", "Law", "Math", "Philosophy", "Physics", "Psychology", "Other"]

	MODEL_INFO = [
	"Models",
	"Overall",
	"Biology", "Business", "Chemistry", "Computer Science", "Economics", "Engineering",
	"Health", "History", "Law", "Math", "Philosophy", "Physics", "Psychology", "Other"]

	DATA_TITLE_TYPE = ['markdown', 'number', 'number', 'number', 'number', 'number', 'number',
	'number', 'number', 'number', 'number', 'number', 'number', 'number',
	'number', 'number']

	SUBMISSION_NAME = "mmlu_pro_leaderboard_submission"
	SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/TIGER-Lab/", SUBMISSION_NAME)
	CSV_DIR = "./mmlu_pro_leaderboard_submission/results.csv"

	COLUMN_NAMES = MODEL_INFO

	LEADERBOARD_INTRODUCTION = """# MMLU-Pro Leaderboard

	MMLU-Pro dataset, a more robust and challenging massive multi-task understanding dataset tailored to more rigorously benchmark large language models' capabilities. This dataset contains 12K complex questions across various disciplines. The following are the accuracies of various models evaluated on MMLU-Pro.

	We invite you to use our dataset available at [https://huggingface.co/datasets/TIGER-Lab/MMLU-Pro](https://huggingface.co/datasets/TIGER-Lab/MMLU-Pro). If you want to reproduce our results or evaluate your own models on MMLU-Pro, please check out our evaluation scripts at [https://github.com/TIGER-AI-Lab/MMLU-Pro](https://github.com/TIGER-AI-Lab/MMLU-Pro).
	"""

	TABLE_INTRODUCTION = """
	"""

	LEADERBORAD_INFO = """
	We list the information of the used datasets as follows:<br>

	"""

	CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
	CITATION_BUTTON_TEXT = r""""""

	SUBMIT_INTRODUCTION = """# Submit on Science Leaderboard Introduction

	## ⚠ Please note that you need to submit the json file with following format:

	```json
	{
	"Model": "[MODEL_NAME]",
	"Overall": 0.5678,
	"Biology": 0.1234,
	"Business": 0.4567,
	...,
	"Other: 0.3456"
	}
	```
	After submitting, you can click the "Refresh" button to see the updated leaderboard (it may takes few seconds).

	"""


	def get_df():
	repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN)
	repo.git_pull()
	df = pd.read_csv(CSV_DIR)
	df = df.sort_values(by=['Overall'], ascending=False)
	return df[COLUMN_NAMES]


	def add_new_eval(
	input_file,
	):
	if input_file is None:
	return "Error! Empty file!"

	upload_data = json.loads(input_file)
	data_row = [f'{upload_data["Model"]}', upload_data['Overall']]
	for subject in SUBJECTS:
	data_row += [upload_data[subject]]

	submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL,
	use_auth_token=HF_TOKEN, repo_type="dataset")
	submission_repo.git_pull()

	already_submitted = []
	with open(CSV_DIR, mode='r') as file:
	reader = csv.reader(file, delimiter=',')
	for row in reader:
	already_submitted.append(row[0])

	if data_row[0] not in already_submitted:
	with open(CSV_DIR, mode='a', newline='') as file:
	writer = csv.writer(file)
	writer.writerow(data_row)

	submission_repo.push_to_hub()
	print('Submission Successful')
	else:
	print('The entry already exists')


	def refresh_data():
	return get_df()