Spaces:
Running
Running
import pandas as pd | |
from pathlib import Path | |
from ..styles import highlight_color | |
abs_path = Path(__file__).parent.parent.parent | |
def replace_models_names(model_name): | |
if "gpt" in model_name: | |
return model_name | |
replaces = {'meta-llama': 'meta_llama', | |
'epfl-llm':'epfl_llm', | |
'01-ai':'01_ai'} | |
new_name = model_name.replace('model-', '') | |
for k, v in replaces.items(): | |
if new_name.startswith(k): | |
new_name = new_name.replace(k, v) | |
new_name = new_name.replace('-','/',1) | |
new_name = new_name.replace('_','-',1) | |
new_name = f"[{new_name}](https://huggingface.co/{new_name})" | |
return new_name | |
def generate_ORDER_LIST_LDEK_and_data_types(json_data): | |
ORDER_LIST_LDEK = ["model_name", "overall_accuracy"] | |
data_types = ["markdown", "number"] | |
for key in json_data.keys(): | |
if key not in ["model_name", "overall_accuracy"]: | |
ORDER_LIST_LDEK.append(key) | |
data_types.append("number") | |
ORDER_LIST_LDEK[2:] = sorted(ORDER_LIST_LDEK[2:]) | |
return ORDER_LIST_LDEK, data_types | |
def filter_columns_ldek(column_choices): | |
selected_columns = [col for col in ORDER_LIST_LDEK if col in column_choices] | |
return LDEK_ACCS[selected_columns] | |
def load_json_data(file_path, ORDER_LIST_LDEK): | |
LDEK_ACCS = pd.read_json(file_path) | |
for column in LDEK_ACCS.columns: | |
if LDEK_ACCS[column].apply(type).eq(dict).any(): | |
LDEK_ACCS[column] = LDEK_ACCS[column].apply(str) | |
LDEK_ACCS["model_name"] = LDEK_ACCS["model_name"].apply( | |
lambda name: replace_models_names(name) | |
) | |
for column in LDEK_ACCS.select_dtypes(include='number').columns: | |
LDEK_ACCS[column] = LDEK_ACCS[column].round(2) | |
LDEK_ACCS["overall_accuracy"] = pd.to_numeric(LDEK_ACCS["overall_accuracy"], errors='coerce') | |
ordered_columns = [col for col in ORDER_LIST_LDEK if col in LDEK_ACCS.columns] | |
LDEK_ACCS = LDEK_ACCS[ordered_columns] | |
if "overall_accuracy" in LDEK_ACCS.columns: | |
LDEK_ACCS = LDEK_ACCS.sort_values(by="overall_accuracy", ascending=False) | |
return LDEK_ACCS | |
file_path = str(abs_path / "leaderboards/ldek_accs.json") | |
with open(file_path, 'r', encoding='utf-8') as file: | |
sample_data = pd.read_json(file_path).iloc[0].to_dict() | |
ORDER_LIST_LDEK, DATA_TYPES_LDEK = generate_ORDER_LIST_LDEK_and_data_types(sample_data) | |
LDEK_ACCS = load_json_data(file_path, ORDER_LIST_LDEK) | |
LDEK_ACCS = LDEK_ACCS.style.highlight_max( | |
color = highlight_color, | |
subset=LDEK_ACCS.columns[1:]).format(precision=2) | |
COLUMN_HEADERS_LDEK = ORDER_LIST_LDEK | |
print(ORDER_LIST_LDEK) | |
file_path2 = str(abs_path / "leaderboards/ldek_en_accs.json") | |
with open(file_path, 'r', encoding='utf-8') as file: | |
sample_data2 = pd.read_json(file_path).iloc[0].to_dict() | |
ORDER_LIST_LDEK_EN, DATA_TYPES_LDEK_EN = generate_ORDER_LIST_LDEK_and_data_types(sample_data2) | |
LDEK_EN_ACCS = load_json_data(file_path2, ORDER_LIST_LDEK_EN) | |
LDEK_EN_ACCS = LDEK_EN_ACCS.style.highlight_max( | |
color = highlight_color, | |
subset=LDEK_EN_ACCS.columns[1:]).format(precision=2) | |
COLUMN_HEADERS_LDEK_EN = ORDER_LIST_LDEK_EN | |
print(ORDER_LIST_LDEK_EN) | |