Spaces:
Running
Running
Joschka Strueber
commited on
Commit
·
e64ca4e
1
Parent(s):
5815cf9
[Add] cache loading data from hf
Browse files- src/dataloading.py +6 -5
- src/similarity.py +2 -2
src/dataloading.py
CHANGED
@@ -3,6 +3,7 @@ import numpy as np
|
|
3 |
from huggingface_hub import HfApi
|
4 |
|
5 |
from functools import lru_cache
|
|
|
6 |
|
7 |
|
8 |
def get_leaderboard_models():
|
@@ -17,9 +18,7 @@ def get_leaderboard_models():
|
|
17 |
dataset_id = dataset.id
|
18 |
try:
|
19 |
# Check if the dataset can be loaded
|
20 |
-
print(dataset_id)
|
21 |
check_gated = datasets.get_dataset_config_names(dataset_id)
|
22 |
-
print(check_gated)
|
23 |
# Format: "open-llm-leaderboard/<provider>__<model_name>-details"
|
24 |
model_part = dataset_id.split("/")[-1].replace("-details", "")
|
25 |
if "__" in model_part:
|
@@ -27,7 +26,7 @@ def get_leaderboard_models():
|
|
27 |
models.append(f"{provider}/{model}")
|
28 |
else:
|
29 |
models.append(model_part)
|
30 |
-
except
|
31 |
pass
|
32 |
|
33 |
return sorted(models)
|
@@ -77,6 +76,7 @@ def filter_labels(doc):
|
|
77 |
return labels
|
78 |
|
79 |
|
|
|
80 |
def load_run_data(model_name, dataset_name):
|
81 |
try:
|
82 |
model_name = model_name.replace("/", "__")
|
@@ -104,6 +104,7 @@ def load_run_data(model_name, dataset_name):
|
|
104 |
return log_probs, labels
|
105 |
|
106 |
|
107 |
-
|
108 |
-
|
|
|
109 |
|
|
|
3 |
from huggingface_hub import HfApi
|
4 |
|
5 |
from functools import lru_cache
|
6 |
+
from datasets.exceptions import DatasetNotFoundError
|
7 |
|
8 |
|
9 |
def get_leaderboard_models():
|
|
|
18 |
dataset_id = dataset.id
|
19 |
try:
|
20 |
# Check if the dataset can be loaded
|
|
|
21 |
check_gated = datasets.get_dataset_config_names(dataset_id)
|
|
|
22 |
# Format: "open-llm-leaderboard/<provider>__<model_name>-details"
|
23 |
model_part = dataset_id.split("/")[-1].replace("-details", "")
|
24 |
if "__" in model_part:
|
|
|
26 |
models.append(f"{provider}/{model}")
|
27 |
else:
|
28 |
models.append(model_part)
|
29 |
+
except DatasetNotFoundError as e:
|
30 |
pass
|
31 |
|
32 |
return sorted(models)
|
|
|
76 |
return labels
|
77 |
|
78 |
|
79 |
+
|
80 |
def load_run_data(model_name, dataset_name):
|
81 |
try:
|
82 |
model_name = model_name.replace("/", "__")
|
|
|
104 |
return log_probs, labels
|
105 |
|
106 |
|
107 |
+
@lru_cache(maxsize=8)
|
108 |
+
def load_run_data_cached(model_name, dataset_name):
|
109 |
+
return load_run_data(model_name, dataset_name)
|
110 |
|
src/similarity.py
CHANGED
@@ -2,7 +2,7 @@ import numpy as np
|
|
2 |
|
3 |
from lmsim.metrics import Metrics, CAPA, EC
|
4 |
|
5 |
-
from src.dataloading import
|
6 |
from src.utils import softmax, one_hot
|
7 |
|
8 |
def load_data_and_compute_similarities(models: list[str], dataset: str, metric_name: str) -> np.array:
|
@@ -10,7 +10,7 @@ def load_data_and_compute_similarities(models: list[str], dataset: str, metric_n
|
|
10 |
probs = []
|
11 |
gts = []
|
12 |
for model in models:
|
13 |
-
model_probs, model_gt =
|
14 |
probs.append(model_probs)
|
15 |
gts.append(model_gt)
|
16 |
|
|
|
2 |
|
3 |
from lmsim.metrics import Metrics, CAPA, EC
|
4 |
|
5 |
+
from src.dataloading import load_run_data_cached
|
6 |
from src.utils import softmax, one_hot
|
7 |
|
8 |
def load_data_and_compute_similarities(models: list[str], dataset: str, metric_name: str) -> np.array:
|
|
|
10 |
probs = []
|
11 |
gts = []
|
12 |
for model in models:
|
13 |
+
model_probs, model_gt = load_run_data_cached(model, dataset)
|
14 |
probs.append(model_probs)
|
15 |
gts.append(model_gt)
|
16 |
|