import pandas as pd
import gradio as gr
import csv
import json
import os
import shutil
from huggingface_hub import Repository

HF_TOKEN = os.environ.get("HF_TOKEN")

SUBJECTS = ["Biology", "Business", "Chemistry", "Computer Science", "Economics", "Engineering",
            "Health", "History", "Law", "Math", "Philosophy", "Physics", "Psychology", "Other"]

MODEL_INFO = [
    "Models",
    "Overall",
    "Biology", "Business", "Chemistry", "Computer Science", "Economics", "Engineering",
    "Health", "History", "Law", "Math", "Philosophy", "Physics", "Psychology", "Other"]

DATA_TILE_TYPE = ['markdown', 'number', 'number', 'number', 'number', 'number', 'number',
                  'number', 'number', 'number', 'number', 'number', 'number', 'number',
                  'number', 'number']

SUBMISSION_NAME = "mmlu_pro_leaderboard_submission"
SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/TIGER-Lab/", SUBMISSION_NAME)
CSV_DIR = "./mmlu_pro_leaderboard_submission/results.csv"

COLUMN_NAMES = MODEL_INFO

LEADERBORAD_INTRODUCTION = """# MMLU-Pro Leaderboard
    MMLU-Pro dataset, a more robust and challenging massive multi-task understanding dataset tailored to more \
    rigorously benchmark large language models' capabilities. This dataset contains 12K \
    complex questions across various disciplines.
    """

TABLE_INTRODUCTION = """
    """

LEADERBORAD_INFO = """
We list the information of the used datasets as follows:<br>

"""

CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
CITATION_BUTTON_TEXT = r""""""

SUBMIT_INTRODUCTION = """# Submit on Science Leaderboard Introduction

## ⚠ Please note that you need to submit the json file with following format:

```json
{
    "Model": "[NAME]",
    "Repo": "https://huggingface.co/[MODEL_NAME],"
    "Overall": 56.7,
    "Biology": 23.4, 
    "Business": 45.6,
    ...,
    "Other: 56.7"
}
```
After submitting, you can click the "Refresh" button to see the updated leaderboard(it may takes few seconds).

"""


def get_df():
    print("HF_TOKEN", HF_TOKEN)
    print("SUBMISSION_URL", SUBMISSION_URL)
    repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN)
    repo.git_pull()
    df = pd.read_csv(CSV_DIR)
    df = df.sort_values(by=['Overall'], ascending=False)
    return df[COLUMN_NAMES]


def add_new_eval(
    input_file,
):
    if input_file is None:
        return "Error! Empty file!"

    upload_data = json.loads(input_file)
    data_row = [f'[{upload_data["Model"]}]({upload_data["Repo"]})', upload_data['Overall']]
    for subject in SUBJECTS:
        data_row += [upload_data[subject]]

    submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL,
                                 use_auth_token=HF_TOKEN, repo_type="dataset")
    submission_repo.git_pull()

    already_submitted = []
    with open(CSV_DIR, mode='r') as file:
        reader = csv.reader(file, delimiter=',')
        for row in reader:
            already_submitted.append(row[0])

    if data_row[0] not in already_submitted:
        with open(CSV_DIR, mode='a', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(data_row)
        
        submission_repo.push_to_hub()
        print('Submission Successful')
    else:
        print('The entry already exists')


def refresh_data():
    return get_df()