Spaces:

amu-cai
/

Polish_Medical_Exams

Running

Polish_Medical_Exams / src /structures /ldek_structure.py

michal

Update

2051b0b 2 months ago

3.15 kB

	import pandas as pd
	from pathlib import Path
	from ..styles import highlight_color

	abs_path = Path(__file__).parent.parent.parent

	def replace_models_names(model_name):
	if "gpt" in model_name:
	return model_name
	replaces = {'meta-llama': 'meta_llama',
	'epfl-llm':'epfl_llm',
	'01-ai':'01_ai'}
	new_name = model_name.replace('model-', '')
	for k, v in replaces.items():
	if new_name.startswith(k):
	new_name = new_name.replace(k, v)
	new_name = new_name.replace('-','/',1)
	new_name = new_name.replace('_','-',1)
	new_name = f"[{new_name}](https://huggingface.co/{new_name})"
	return new_name

	def generate_ORDER_LIST_LDEK_and_data_types(json_data):
	ORDER_LIST_LDEK = ["model_name", "overall_accuracy"]
	data_types = ["markdown", "number"]

	for key in json_data.keys():
	if key not in ["model_name", "overall_accuracy"]:
	ORDER_LIST_LDEK.append(key)
	data_types.append("number")
	ORDER_LIST_LDEK[2:] = sorted(ORDER_LIST_LDEK[2:])
	return ORDER_LIST_LDEK, data_types

	def filter_columns_ldek(column_choices):
	selected_columns = [col for col in ORDER_LIST_LDEK if col in column_choices]
	return LDEK_ACCS[selected_columns]

	def load_json_data(file_path, ORDER_LIST_LDEK):
	LDEK_ACCS = pd.read_json(file_path)

	for column in LDEK_ACCS.columns:
	if LDEK_ACCS[column].apply(type).eq(dict).any():
	LDEK_ACCS[column] = LDEK_ACCS[column].apply(str)

	LDEK_ACCS["model_name"] = LDEK_ACCS["model_name"].apply(
	lambda name: replace_models_names(name)
	)

	for column in LDEK_ACCS.select_dtypes(include='number').columns:
	LDEK_ACCS[column] = LDEK_ACCS[column].round(2)

	LDEK_ACCS["overall_accuracy"] = pd.to_numeric(LDEK_ACCS["overall_accuracy"], errors='coerce')

	ordered_columns = [col for col in ORDER_LIST_LDEK if col in LDEK_ACCS.columns]
	LDEK_ACCS = LDEK_ACCS[ordered_columns]

	if "overall_accuracy" in LDEK_ACCS.columns:
	LDEK_ACCS = LDEK_ACCS.sort_values(by="overall_accuracy", ascending=False)

	return LDEK_ACCS



	file_path = str(abs_path / "leaderboards/ldek_accs.json")
	with open(file_path, 'r', encoding='utf-8') as file:
	sample_data = pd.read_json(file_path).iloc[0].to_dict()

	ORDER_LIST_LDEK, DATA_TYPES_LDEK = generate_ORDER_LIST_LDEK_and_data_types(sample_data)
	LDEK_ACCS = load_json_data(file_path, ORDER_LIST_LDEK)
	LDEK_ACCS = LDEK_ACCS.style.highlight_max(
	color = highlight_color,
	subset=LDEK_ACCS.columns[1:]).format(precision=2)
	COLUMN_HEADERS_LDEK = ORDER_LIST_LDEK
	print(ORDER_LIST_LDEK)


	file_path2 = str(abs_path / "leaderboards/ldek_en_accs.json")
	with open(file_path, 'r', encoding='utf-8') as file:
	sample_data2 = pd.read_json(file_path).iloc[0].to_dict()
	ORDER_LIST_LDEK_EN, DATA_TYPES_LDEK_EN = generate_ORDER_LIST_LDEK_and_data_types(sample_data2)
	LDEK_EN_ACCS = load_json_data(file_path2, ORDER_LIST_LDEK_EN)
	LDEK_EN_ACCS = LDEK_EN_ACCS.style.highlight_max(
	color = highlight_color,
	subset=LDEK_EN_ACCS.columns[1:]).format(precision=2)
	COLUMN_HEADERS_LDEK_EN = ORDER_LIST_LDEK_EN
	print(ORDER_LIST_LDEK_EN)