Spaces:
Runtime error
Runtime error
trending feature
Browse files
app.py
CHANGED
@@ -77,7 +77,10 @@ def make_leaderboard(orgs, which_one):
|
|
77 |
data_rows = []
|
78 |
open_llm_leaderboard = get_openllm_leaderboard() if which_one == "models" else None
|
79 |
|
|
|
|
|
80 |
for org in tqdm(orgs, desc=f"Scraping Organizations ({which_one})", position=0, leave=True):
|
|
|
81 |
df = get_models(org, which_one)
|
82 |
if len(df) == 0:
|
83 |
continue
|
@@ -87,6 +90,7 @@ def make_leaderboard(orgs, which_one):
|
|
87 |
|
88 |
if which_one == "models":
|
89 |
open_llm_leaderboard_get_org = get_ranking(open_llm_leaderboard, org)
|
|
|
90 |
data_rows.append({
|
91 |
"Organization Name": org,
|
92 |
"Total Downloads": sum_info["Downloads"],
|
@@ -99,9 +103,12 @@ def make_leaderboard(orgs, which_one):
|
|
99 |
"Most Downloaded Model": most_info["Most Download"]["id"],
|
100 |
"Most Download Count": most_info["Most Download"]["downloads"],
|
101 |
"Most Liked Model": most_info["Most Likes"]["id"],
|
102 |
-
"Most Like Count": most_info["Most Likes"]["likes"]
|
|
|
|
|
103 |
})
|
104 |
elif which_one == "datasets":
|
|
|
105 |
data_rows.append({
|
106 |
"Organization Name": org,
|
107 |
"Total Downloads": sum_info["Downloads"],
|
@@ -112,17 +119,22 @@ def make_leaderboard(orgs, which_one):
|
|
112 |
"Most Downloaded Dataset": most_info["Most Download"]["id"],
|
113 |
"Most Download Count": most_info["Most Download"]["downloads"],
|
114 |
"Most Liked Dataset": most_info["Most Likes"]["id"],
|
115 |
-
"Most Like Count": most_info["Most Likes"]["likes"]
|
|
|
|
|
116 |
})
|
117 |
|
118 |
elif which_one == "spaces":
|
|
|
119 |
data_rows.append({
|
120 |
"Organization Name": org,
|
121 |
"Total Likes": sum_info["Likes"],
|
122 |
"Number of Spaces": num_things,
|
123 |
"Average Likes per Space": int(sum_info["Likes"] / num_things) if num_things != 0 else 0,
|
124 |
"Most Liked Space": most_info["Most Likes"]["id"],
|
125 |
-
"Most Like Count": most_info["Most Likes"]["likes"]
|
|
|
|
|
126 |
})
|
127 |
|
128 |
leaderboard = pd.DataFrame(data_rows)
|
@@ -132,6 +144,9 @@ def make_leaderboard(orgs, which_one):
|
|
132 |
leaderboard.insert(0, "Serial Number", range(1, len(leaderboard) + 1))
|
133 |
return leaderboard
|
134 |
|
|
|
|
|
|
|
135 |
|
136 |
with open("org_names.txt", "r") as f:
|
137 |
org_names_in_list = [i.rstrip("\n") for i in f.readlines()]
|
@@ -139,7 +154,6 @@ with open("org_names.txt", "r") as f:
|
|
139 |
|
140 |
INTRODUCTION_TEXT = f"""
|
141 |
π― The Organization Leaderboard aims to track organization rankings. This space is inspired by the [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard).
|
142 |
-
|
143 |
## Available Dataframes:
|
144 |
|
145 |
- ποΈ Models
|
@@ -155,6 +169,8 @@ INTRODUCTION_TEXT = f"""
|
|
155 |
π οΈ Organization names are retrieved using web scraping from [Huggingface Organizations](https://huggingface.co/organizations).
|
156 |
|
157 |
**π Note:** In the model's dataframe, there are some columns related to the [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard). This data is also retrieved through web scraping.
|
|
|
|
|
158 |
"""
|
159 |
|
160 |
def clickable(x, which_one):
|
@@ -164,8 +180,10 @@ def clickable(x, which_one):
|
|
164 |
else:
|
165 |
return "Not Found"
|
166 |
else:
|
167 |
-
|
168 |
-
|
|
|
|
|
169 |
def models_df_to_clickable(df, columns, which_one):
|
170 |
for column in columns:
|
171 |
if column == "Organization Name":
|
@@ -174,6 +192,34 @@ def models_df_to_clickable(df, columns, which_one):
|
|
174 |
df[column] = df[column].apply(lambda x: clickable(x, which_one))
|
175 |
return df
|
176 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
demo = gr.Blocks()
|
178 |
|
179 |
with gr.Blocks() as demo:
|
@@ -182,28 +228,29 @@ with gr.Blocks() as demo:
|
|
182 |
|
183 |
with gr.TabItem("ποΈ Models", id=1):
|
184 |
|
185 |
-
columns_to_convert = ["Organization Name", "Best Model On Open LLM Leaderboard", "Most Downloaded Model", "Most Liked Model"]
|
186 |
models_df = make_leaderboard(org_names_in_list, "models")
|
187 |
models_df = models_df_to_clickable(models_df, columns_to_convert, "models")
|
188 |
|
189 |
-
headers = ["π’ Serial Number", "π’ Organization Name", "π₯ Total Downloads", "π Total Likes", "π€ Number of Models", "π Best Model On Open LLM Leaderboard", "π₯ Best Rank On Open LLM Leaderboard", "π Average Downloads per Model", "π Average Likes per Model", "π Most Downloaded Model", "π Most Download Count", "β€οΈ Most Liked Model", "π Most Like Count"]
|
190 |
-
gr.Dataframe(models_df.head(400), headers=headers, interactive=True, datatype=["str", "markdown", "str", "str", "str", "markdown", "str", "str", "str", "markdown", "str", "markdown", "str"])
|
191 |
|
192 |
with gr.TabItem("π Datasets", id=2):
|
193 |
-
columns_to_convert = ["Organization Name", "Most Downloaded Dataset", "Most Liked Dataset"]
|
194 |
dataset_df = make_leaderboard(org_names_in_list, "datasets")
|
195 |
dataset_df = models_df_to_clickable(dataset_df, columns_to_convert, "datasets")
|
196 |
|
197 |
-
headers = ["π’ Serial Number", "π’ Organization Name", "π₯ Total Downloads", "π Total Likes", "π Number of Datasets", "π Average Downloads per Dataset", "π Average Likes per Dataset", "π Most Downloaded Dataset", "π Most Download Count", "β€οΈ Most Liked Dataset", "π Most Like Count"]
|
198 |
-
gr.Dataframe(dataset_df.head(250), headers=headers, interactive=False, datatype=["str", "markdown", "str", "str", "str", "str", "str", "markdown", "str", "markdown", "str"])
|
199 |
|
200 |
with gr.TabItem("π Spaces", id=3):
|
201 |
-
columns_to_convert = ["Organization Name", "Most Liked Space"]
|
202 |
|
203 |
spaces_df = make_leaderboard(org_names_in_list, "spaces")
|
204 |
spaces_df = models_df_to_clickable(spaces_df, columns_to_convert, "spaces")
|
205 |
|
206 |
-
headers = ["π’ Serial Number", "π’ Organization Name", "π Total Likes", "π Number of Spaces", "π Average Likes per Space", "β€οΈ Most Liked Space", "π Most Like Count"]
|
207 |
-
gr.Dataframe(spaces_df.head(150), headers=headers, interactive=False, datatype=["str", "markdown", "str", "str", "str", "markdown", "str"])
|
208 |
|
209 |
demo.launch()
|
|
|
|
77 |
data_rows = []
|
78 |
open_llm_leaderboard = get_openllm_leaderboard() if which_one == "models" else None
|
79 |
|
80 |
+
trend = get_trending_list(1, which_one)
|
81 |
+
|
82 |
for org in tqdm(orgs, desc=f"Scraping Organizations ({which_one})", position=0, leave=True):
|
83 |
+
rank = get_ranking_trend(trend, org)
|
84 |
df = get_models(org, which_one)
|
85 |
if len(df) == 0:
|
86 |
continue
|
|
|
90 |
|
91 |
if which_one == "models":
|
92 |
open_llm_leaderboard_get_org = get_ranking(open_llm_leaderboard, org)
|
93 |
+
|
94 |
data_rows.append({
|
95 |
"Organization Name": org,
|
96 |
"Total Downloads": sum_info["Downloads"],
|
|
|
103 |
"Most Downloaded Model": most_info["Most Download"]["id"],
|
104 |
"Most Download Count": most_info["Most Download"]["downloads"],
|
105 |
"Most Liked Model": most_info["Most Likes"]["id"],
|
106 |
+
"Most Like Count": most_info["Most Likes"]["likes"],
|
107 |
+
"Trending Model": rank['id'],
|
108 |
+
"Best Rank at Trending Models": rank['rank']
|
109 |
})
|
110 |
elif which_one == "datasets":
|
111 |
+
|
112 |
data_rows.append({
|
113 |
"Organization Name": org,
|
114 |
"Total Downloads": sum_info["Downloads"],
|
|
|
119 |
"Most Downloaded Dataset": most_info["Most Download"]["id"],
|
120 |
"Most Download Count": most_info["Most Download"]["downloads"],
|
121 |
"Most Liked Dataset": most_info["Most Likes"]["id"],
|
122 |
+
"Most Like Count": most_info["Most Likes"]["likes"],
|
123 |
+
"Trending Dataset": rank['id'],
|
124 |
+
"Best Rank at Trending Datasets": rank['rank']
|
125 |
})
|
126 |
|
127 |
elif which_one == "spaces":
|
128 |
+
|
129 |
data_rows.append({
|
130 |
"Organization Name": org,
|
131 |
"Total Likes": sum_info["Likes"],
|
132 |
"Number of Spaces": num_things,
|
133 |
"Average Likes per Space": int(sum_info["Likes"] / num_things) if num_things != 0 else 0,
|
134 |
"Most Liked Space": most_info["Most Likes"]["id"],
|
135 |
+
"Most Like Count": most_info["Most Likes"]["likes"],
|
136 |
+
"Trending Space": rank['id'],
|
137 |
+
"Best Rank at Trending Spaces": rank['rank']
|
138 |
})
|
139 |
|
140 |
leaderboard = pd.DataFrame(data_rows)
|
|
|
144 |
leaderboard.insert(0, "Serial Number", range(1, len(leaderboard) + 1))
|
145 |
return leaderboard
|
146 |
|
147 |
+
"""# Gradio baΕlasΔ±n
|
148 |
+
|
149 |
+
"""
|
150 |
|
151 |
with open("org_names.txt", "r") as f:
|
152 |
org_names_in_list = [i.rstrip("\n") for i in f.readlines()]
|
|
|
154 |
|
155 |
INTRODUCTION_TEXT = f"""
|
156 |
π― The Organization Leaderboard aims to track organization rankings. This space is inspired by the [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard).
|
|
|
157 |
## Available Dataframes:
|
158 |
|
159 |
- ποΈ Models
|
|
|
169 |
π οΈ Organization names are retrieved using web scraping from [Huggingface Organizations](https://huggingface.co/organizations).
|
170 |
|
171 |
**π Note:** In the model's dataframe, there are some columns related to the [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard). This data is also retrieved through web scraping.
|
172 |
+
*
|
173 |
+
*π Note:** In trending models, first 300 models/datasets/spaces is being retrieved from huggingface.
|
174 |
"""
|
175 |
|
176 |
def clickable(x, which_one):
|
|
|
180 |
else:
|
181 |
return "Not Found"
|
182 |
else:
|
183 |
+
if x != "Not Found":
|
184 |
+
return f'<a target="_blank" href="https://huggingface.co/{which_one}/{x}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{x}</a>'
|
185 |
+
return "Not Found"
|
186 |
+
|
187 |
def models_df_to_clickable(df, columns, which_one):
|
188 |
for column in columns:
|
189 |
if column == "Organization Name":
|
|
|
192 |
df[column] = df[column].apply(lambda x: clickable(x, which_one))
|
193 |
return df
|
194 |
|
195 |
+
|
196 |
+
def get_trending_list(pages, which_one):
|
197 |
+
trending_list = []
|
198 |
+
for i in range(pages):
|
199 |
+
json_data = requests.get(f"https://huggingface.co/{which_one}-json?p={i}").json()
|
200 |
+
|
201 |
+
for thing in json_data[which_one]:
|
202 |
+
id = thing["id"]
|
203 |
+
likes = thing["likes"]
|
204 |
+
|
205 |
+
if which_one != "spaces":
|
206 |
+
downloads = thing["downloads"]
|
207 |
+
|
208 |
+
trending_list.append({"id": id, "downloads": downloads, "likes": likes})
|
209 |
+
else:
|
210 |
+
trending_list.append({"id": id, "likes": likes})
|
211 |
+
|
212 |
+
return trending_list
|
213 |
+
|
214 |
+
def get_ranking_trend(json_data, org_name):
|
215 |
+
names = [item['id'].split("/")[0] for item in json_data]
|
216 |
+
models = [item['id'] for item in json_data]
|
217 |
+
if org_name in names:
|
218 |
+
temp = names.index(org_name)
|
219 |
+
return {"id": models[temp], "rank": temp+1}
|
220 |
+
else:
|
221 |
+
return {"id": "Not Found", "rank": "Not Found"}
|
222 |
+
|
223 |
demo = gr.Blocks()
|
224 |
|
225 |
with gr.Blocks() as demo:
|
|
|
228 |
|
229 |
with gr.TabItem("ποΈ Models", id=1):
|
230 |
|
231 |
+
columns_to_convert = ["Organization Name", "Best Model On Open LLM Leaderboard", "Most Downloaded Model", "Most Liked Model", "Trending Model"]
|
232 |
models_df = make_leaderboard(org_names_in_list, "models")
|
233 |
models_df = models_df_to_clickable(models_df, columns_to_convert, "models")
|
234 |
|
235 |
+
headers = ["π’ Serial Number", "π’ Organization Name", "π₯ Total Downloads", "π Total Likes", "π€ Number of Models", "π Best Model On Open LLM Leaderboard", "π₯ Best Rank On Open LLM Leaderboard", "π Average Downloads per Model", "π Average Likes per Model", "π Most Downloaded Model", "π Most Download Count", "β€οΈ Most Liked Model", "π Most Like Count", "π₯ Trending Model", "π Best Rank at Trending Models"]
|
236 |
+
gr.Dataframe(models_df.head(400), headers=headers, interactive=True, datatype=["str", "markdown", "str", "str", "str", "markdown", "str", "str", "str", "markdown", "str", "markdown", "str", "markdown", "str"])
|
237 |
|
238 |
with gr.TabItem("π Datasets", id=2):
|
239 |
+
columns_to_convert = ["Organization Name", "Most Downloaded Dataset", "Most Liked Dataset", "Trending Dataset"]
|
240 |
dataset_df = make_leaderboard(org_names_in_list, "datasets")
|
241 |
dataset_df = models_df_to_clickable(dataset_df, columns_to_convert, "datasets")
|
242 |
|
243 |
+
headers = ["π’ Serial Number", "π’ Organization Name", "π₯ Total Downloads", "π Total Likes", "π Number of Datasets", "π Average Downloads per Dataset", "π Average Likes per Dataset", "π Most Downloaded Dataset", "π Most Download Count", "β€οΈ Most Liked Dataset", "π Most Like Count", "π₯ Trending Dataset", "π Best Rank at Trending Datasets"]
|
244 |
+
gr.Dataframe(dataset_df.head(250), headers=headers, interactive=False, datatype=["str", "markdown", "str", "str", "str", "str", "str", "markdown", "str", "markdown", "str", "markdown", "str"])
|
245 |
|
246 |
with gr.TabItem("π Spaces", id=3):
|
247 |
+
columns_to_convert = ["Organization Name", "Most Liked Space", "Trending Space"]
|
248 |
|
249 |
spaces_df = make_leaderboard(org_names_in_list, "spaces")
|
250 |
spaces_df = models_df_to_clickable(spaces_df, columns_to_convert, "spaces")
|
251 |
|
252 |
+
headers = ["π’ Serial Number", "π’ Organization Name", "π Total Likes", "π Number of Spaces", "π Average Likes per Space", "β€οΈ Most Liked Space", "π Most Like Count", "π₯ Trending Space", "π Best Rank at Trending Spaces"]
|
253 |
+
gr.Dataframe(spaces_df.head(150), headers=headers, interactive=False, datatype=["str", "markdown", "str", "str", "str", "markdown", "str", "markdown", "str"])
|
254 |
|
255 |
demo.launch()
|
256 |
+
|