Weyaxi commited on
Commit
fec983f
Β·
1 Parent(s): d2fc390
Files changed (1) hide show
  1. app.py +94 -113
app.py CHANGED
@@ -9,45 +9,40 @@ import gradio as gr
9
 
10
  api = HfApi()
11
 
12
-
13
  def get_models(org_name, which_one):
14
- all_list = []
15
- if which_one == "models":
16
- things = api.list_models(author=org_name)
17
- elif which_one == "datasets":
18
- things = api.list_datasets(author=org_name)
19
- elif which_one == "spaces":
20
- things = api.list_spaces(author=org_name)
21
 
22
- for i in things:
23
- i = i.__dict__
24
- json_format_data = {"id": i['id'], "downloads": i['downloads'],
25
- "likes": i['likes']} if which_one != "spaces" else {"id": i['id'], "downloads": 0, "likes": i['likes']}
26
 
27
- all_list.append(json_format_data)
28
 
29
- df_all_list = (pd.DataFrame(all_list))
30
 
31
- return df_all_list
32
 
 
33
 
34
  def get_most(df_for_most_function):
35
- download_sorted_df = df_for_most_function.sort_values(by=['downloads'], ascending=False)
36
- most_downloaded = download_sorted_df.iloc[0]
37
 
38
- like_sorted_df = df_for_most_function.sort_values(by=['likes'], ascending=False)
39
- most_liked = like_sorted_df.iloc[0]
40
-
41
- return {"Most Download": {"id": most_downloaded['id'], "downloads": most_downloaded['downloads'], "likes": most_downloaded['likes']},
42
- "Most Likes": {"id": most_liked['id'], "downloads": most_liked['downloads'], "likes": most_liked['likes']}}
43
 
 
44
 
45
  def get_sum(df_for_sum_function):
46
- sum_downloads = sum(df_for_sum_function['downloads'].tolist())
47
- sum_likes = sum(df_for_sum_function['likes'].tolist())
48
-
49
- return {"Downloads": sum_downloads, "Likes": sum_likes}
50
 
 
51
 
52
  def get_openllm_leaderboard():
53
  url = 'https://huggingfaceh4-open-llm-leaderboard.hf.space/'
@@ -72,14 +67,12 @@ def get_openllm_leaderboard():
72
  except (IndexError, AttributeError):
73
  return result_list
74
 
75
-
76
  def get_ranking(model_list, target_org):
77
  for index, model in enumerate(model_list):
78
- if model.split("/")[0].lower() == target_org.lower():
79
- return [index + 1, model]
80
  return "Not Found"
81
 
82
-
83
  def make_leaderboard(orgs, which_one):
84
  data_rows = []
85
  open_llm_leaderboard = get_openllm_leaderboard() if which_one == "models" else None
@@ -87,61 +80,64 @@ def make_leaderboard(orgs, which_one):
87
  for org in tqdm(orgs, desc=f"Scraping Organizations ({which_one})", position=0, leave=True):
88
  df = get_models(org, which_one)
89
  if len(df) == 0:
90
- continue
91
  num_things = len(df)
92
  sum_info = get_sum(df)
93
  most_info = get_most(df)
94
 
95
  if which_one == "models":
96
- open_llm_leaderboard_get_org = get_ranking(open_llm_leaderboard, org)
97
- data_rows.append({
98
- "Organization Name": org,
99
- "Total Downloads": sum_info["Downloads"],
100
- "Total Likes": sum_info["Likes"],
101
- "Number of Models": num_things,
102
- "Best Model On Open LLM Leaderboard": open_llm_leaderboard_get_org[1] if open_llm_leaderboard_get_org != "Not Found" else open_llm_leaderboard_get_org,
103
- "Best Rank On Open LLM Leaderboard": open_llm_leaderboard_get_org[0] if open_llm_leaderboard_get_org != "Not Found" else open_llm_leaderboard_get_org,
104
- "Average Downloads per Model": int(sum_info["Downloads"] / num_things) if num_things != 0 else 0,
105
- "Average Likes per Model": int(sum_info["Likes"] / num_things) if num_things != 0 else 0,
106
- "Most Downloaded Model": most_info["Most Download"]["id"],
107
- "Most Download Count": most_info["Most Download"]["downloads"],
108
- "Most Liked Model": most_info["Most Likes"]["id"],
109
- "Most Like Count": most_info["Most Likes"]["likes"]
110
- })
111
  elif which_one == "datasets":
112
- data_rows.append({
113
- "Organization Name": org,
114
- "Total Downloads": sum_info["Downloads"],
115
- "Total Likes": sum_info["Likes"],
116
- "Number of Datasets": num_things,
117
- "Average Downloads per Dataset": int(sum_info["Downloads"] / num_things) if num_things != 0 else 0,
118
- "Average Likes per Dataset": int(sum_info["Likes"] / num_things) if num_things != 0 else 0,
119
- "Most Downloaded Dataset": most_info["Most Download"]["id"],
120
- "Most Download Count": most_info["Most Download"]["downloads"],
121
- "Most Liked Dataset": most_info["Most Likes"]["id"],
122
- "Most Like Count": most_info["Most Likes"]["likes"]
123
- })
124
 
125
  elif which_one == "spaces":
126
- data_rows.append({
127
- "Organization Name": org,
128
- "Total Likes": sum_info["Likes"],
129
- "Number of Spaces": num_things,
130
- "Average Likes per Space": int(sum_info["Likes"] / num_things) if num_things != 0 else 0,
131
- "Most Liked Space": most_info["Most Likes"]["id"],
132
- "Most Like Count": most_info["Most Likes"]["likes"]
133
- })
134
 
135
  leaderboard = pd.DataFrame(data_rows)
136
- leaderboard = leaderboard.sort_values(by=["Total Downloads"] if which_one != "spaces" else ["Total Likes"], ascending=False)
 
 
137
  leaderboard.insert(0, "Serial Number", range(1, len(leaderboard) + 1))
138
  return leaderboard
139
 
140
 
141
  with open("org_names.txt", "r") as f:
142
- org_names_in_list = [i.rstrip("\n") for i in f.readlines()]
143
 
144
- markdown_main_text = f"""
 
145
  🎯 The Organization Leaderboard aims to track organizations ranking. This space is inspired by [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
146
 
147
  ## Dataframes Available:
@@ -162,68 +158,53 @@ markdown_main_text = f"""
162
 
163
  """
164
 
165
-
166
  def clickable(x, which_one):
167
  if which_one == "models":
168
- if x != "Not Found":
169
- return f'<a target="_blank" href="https://huggingface.co/{x}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{x}</a>'
170
- else:
171
- return "Not Found"
172
  else:
173
  return f'<a target="_blank" href="https://huggingface.co/{which_one}/{x}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{x}</a>'
174
 
175
-
176
- def df_to_clickable(df, columns, which_one):
177
  for column in columns:
178
  if column == "Organization Name":
179
- df[column] = df[column].apply(lambda x: clickable(x, "models"))
180
  else:
181
- df[column] = df[column].apply(lambda x: clickable(x, which_one))
182
  return df
183
 
 
184
 
185
  with gr.Blocks() as demo:
186
- gr.Markdown("""<h1 align="center" id="space-title">πŸ€— Organization Leaderboard</h1>""")
187
- gr.Markdown(markdown_main_text, elem_classes="markdown-text")
188
-
189
- with gr.TabItem("πŸ›οΈ Models", id=1):
190
- columns_to_convert = ["Organization Name", "Best Model On Open LLM Leaderboard", "Most Downloaded Model", "Most Liked Model"]
191
-
192
- models_df = make_leaderboard(org_names_in_list, "models")
193
- models_df = df_to_clickable(models_df, columns_to_convert, "models")
194
-
195
- headers = ["πŸ”’ Serial Number", "🏒 Organization Name", "πŸ“₯ Total Downloads", "πŸ‘ Total Likes", "πŸ€– Number of Models",
196
- "πŸ† Best Model On Open LLM Leaderboard", "πŸ₯‡ Best Rank On Open LLM Leaderboard",
197
- "πŸ“Š Average Downloads per Model", "πŸ“ˆ Average Likes per Model", "πŸš€ Most Downloaded Model",
198
- "πŸ“ˆ Most Download Count", "❀ Most Liked Model", "πŸ‘ Most Like Count"]
199
 
200
- gr.Dataframe(models_df, headers=headers, interactive=True,
201
- datatype=["str", "markdown", "str", "str", "str", "markdown", "str", "str", "str", "markdown",
202
- "str", "markdown", "str"])
203
 
204
- with gr.TabItem("πŸ“Š Dataset", id=2):
205
- columns_to_convert = ["Organization Name", "Most Downloaded Dataset", "Most Liked Dataset"]
206
- dataset_df = make_leaderboard(org_names_in_list, "datasets")
207
- dataset_df = df_to_clickable(dataset_df, columns_to_convert, "datasets")
208
 
209
- headers = ["πŸ”’ Serial Number", "🏒 Organization Name", "πŸ“₯ Total Downloads", "πŸ‘ Total Likes",
210
- "πŸ“Š Number of Datasets", "πŸ“Š Average Downloads per Dataset", "πŸ“ˆ Average Likes per Dataset",
211
- "πŸš€ Most Downloaded Dataset", "πŸ“ˆ Most Download Count", "❀ Most Liked Dataset", "πŸ‘ Most Like Count"]
212
 
213
- gr.Dataframe(dataset_df, headers=headers, interactive=False,
214
- datatype=["str", "markdown", "str", "str", "str", "str", "str", "markdown", "str", "markdown",
215
- "str"])
 
216
 
217
- with gr.TabItem("πŸš€ Spaces", id=3):
218
- columns_to_convert = ["Organization Name", "Most Liked Space"]
219
 
220
- spaces_df = make_leaderboard(org_names_in_list, "spaces")
221
- spaces_df = df_to_clickable(spaces_df, columns_to_convert, "spaces")
222
 
223
- headers = ["πŸ”’ Serial Number", "🏒 Organization Name", "πŸ‘ Total Likes", "πŸš€ Number of Spaces",
224
- "πŸ“ˆ Average Likes per Space", "❀ Most Liked Space", "πŸ‘ Most Like Count"]
225
 
226
- gr.Dataframe(spaces_df, headers=headers, interactive=False,
227
- datatype=["str", "markdown", "str", "str", "str", "markdown", "str"])
228
 
229
- demo.launch()
 
9
 
10
  api = HfApi()
11
 
 
12
  def get_models(org_name, which_one):
13
+ all_list = []
14
+ if which_one == "models":
15
+ things = api.list_models(author=org_name)
16
+ elif which_one == "datasets":
17
+ things = api.list_datasets(author=org_name)
18
+ elif which_one == "spaces":
19
+ things = api.list_spaces(author=org_name)
20
 
21
+ for i in things:
22
+ i = i.__dict__
23
+ json_format_data = {"id": i['id'], "downloads": i['downloads'], "likes": i['likes']} if which_one != "spaces" else {"id": i['id'], "downloads": 0, "likes": i['likes']}
 
24
 
25
+ all_list.append(json_format_data)
26
 
 
27
 
28
+ df_all_list = (pd.DataFrame(all_list))
29
 
30
+ return df_all_list
31
 
32
  def get_most(df_for_most_function):
33
+ download_sorted_df = df_for_most_function.sort_values(by=['downloads'], ascending=False)
34
+ most_downloaded = download_sorted_df.iloc[0]
35
 
36
+ like_sorted_df = df_for_most_function.sort_values(by=['likes'], ascending=False)
37
+ most_liked = like_sorted_df.iloc[0]
 
 
 
38
 
39
+ return {"Most Download": {"id": most_downloaded['id'], "downloads": most_downloaded['downloads'], "likes": most_downloaded['likes']}, "Most Likes": {"id": most_liked['id'], "downloads": most_liked['downloads'], "likes": most_liked['likes']}}
40
 
41
  def get_sum(df_for_sum_function):
42
+ sum_downloads = sum(df_for_sum_function['downloads'].tolist())
43
+ sum_likes = sum(df_for_sum_function['likes'].tolist())
 
 
44
 
45
+ return {"Downloads": sum_downloads, "Likes": sum_likes}
46
 
47
  def get_openllm_leaderboard():
48
  url = 'https://huggingfaceh4-open-llm-leaderboard.hf.space/'
 
67
  except (IndexError, AttributeError):
68
  return result_list
69
 
 
70
  def get_ranking(model_list, target_org):
71
  for index, model in enumerate(model_list):
72
+ if model.split("/")[0].lower() == target_org.lower():
73
+ return [index+1, model]
74
  return "Not Found"
75
 
 
76
  def make_leaderboard(orgs, which_one):
77
  data_rows = []
78
  open_llm_leaderboard = get_openllm_leaderboard() if which_one == "models" else None
 
80
  for org in tqdm(orgs, desc=f"Scraping Organizations ({which_one})", position=0, leave=True):
81
  df = get_models(org, which_one)
82
  if len(df) == 0:
83
+ continue
84
  num_things = len(df)
85
  sum_info = get_sum(df)
86
  most_info = get_most(df)
87
 
88
  if which_one == "models":
89
+ open_llm_leaderboard_get_org = get_ranking(open_llm_leaderboard, org)
90
+ data_rows.append({
91
+ "Organization Name": org,
92
+ "Total Downloads": sum_info["Downloads"],
93
+ "Total Likes": sum_info["Likes"],
94
+ "Number of Models": num_things,
95
+ "Best Model On Open LLM Leaderboard": open_llm_leaderboard_get_org[1] if open_llm_leaderboard_get_org != "Not Found" else open_llm_leaderboard_get_org,
96
+ "Best Rank On Open LLM Leaderboard": open_llm_leaderboard_get_org[0] if open_llm_leaderboard_get_org != "Not Found" else open_llm_leaderboard_get_org,
97
+ "Average Downloads per Model": int(sum_info["Downloads"] / num_things) if num_things != 0 else 0,
98
+ "Average Likes per Model": int(sum_info["Likes"] / num_things) if num_things != 0 else 0,
99
+ "Most Downloaded Model": most_info["Most Download"]["id"],
100
+ "Most Download Count": most_info["Most Download"]["downloads"],
101
+ "Most Liked Model": most_info["Most Likes"]["id"],
102
+ "Most Like Count": most_info["Most Likes"]["likes"]
103
+ })
104
  elif which_one == "datasets":
105
+ data_rows.append({
106
+ "Organization Name": org,
107
+ "Total Downloads": sum_info["Downloads"],
108
+ "Total Likes": sum_info["Likes"],
109
+ "Number of Datasets": num_things,
110
+ "Average Downloads per Dataset": int(sum_info["Downloads"] / num_things) if num_things != 0 else 0,
111
+ "Average Likes per Dataset": int(sum_info["Likes"] / num_things) if num_things != 0 else 0,
112
+ "Most Downloaded Dataset": most_info["Most Download"]["id"],
113
+ "Most Download Count": most_info["Most Download"]["downloads"],
114
+ "Most Liked Dataset": most_info["Most Likes"]["id"],
115
+ "Most Like Count": most_info["Most Likes"]["likes"]
116
+ })
117
 
118
  elif which_one == "spaces":
119
+ data_rows.append({
120
+ "Organization Name": org,
121
+ "Total Likes": sum_info["Likes"],
122
+ "Number of Spaces": num_things,
123
+ "Average Likes per Space": int(sum_info["Likes"] / num_things) if num_things != 0 else 0,
124
+ "Most Liked Space": most_info["Most Likes"]["id"],
125
+ "Most Like Count": most_info["Most Likes"]["likes"]
126
+ })
127
 
128
  leaderboard = pd.DataFrame(data_rows)
129
+ temp = ["Total Downloads"] if which_one != "spaces" else ["Total Likes"]
130
+
131
+ leaderboard = leaderboard.sort_values(by=temp, ascending=False)
132
  leaderboard.insert(0, "Serial Number", range(1, len(leaderboard) + 1))
133
  return leaderboard
134
 
135
 
136
  with open("org_names.txt", "r") as f:
137
+ org_names_in_list = [i.rstrip("\n") for i in f.readlines()]
138
 
139
+
140
+ INTRODUCTION_TEXT = f"""
141
  🎯 The Organization Leaderboard aims to track organizations ranking. This space is inspired by [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
142
 
143
  ## Dataframes Available:
 
158
 
159
  """
160
 
 
161
  def clickable(x, which_one):
162
  if which_one == "models":
163
+ if x != "Not Found":
164
+ return f'<a target="_blank" href="https://huggingface.co/{x}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{x}</a>'
165
+ else:
166
+ return "Not Found"
167
  else:
168
  return f'<a target="_blank" href="https://huggingface.co/{which_one}/{x}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{x}</a>'
169
 
170
+ def models_df_to_clickable(df, columns, which_one):
 
171
  for column in columns:
172
  if column == "Organization Name":
173
+ df[column] = df[column].apply(lambda x: clickable(x, "models"))
174
  else:
175
+ df[column] = df[column].apply(lambda x: clickable(x, which_one))
176
  return df
177
 
178
+ demo = gr.Blocks()
179
 
180
  with gr.Blocks() as demo:
181
+ gr.Markdown("""<h1 align="center" id="space-title">πŸ€— Organization Leaderboard</h1>""")
182
+ gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
 
 
 
 
 
 
 
 
 
 
 
183
 
184
+ with gr.TabItem("πŸ›οΈ Models", id=1):
 
 
185
 
186
+ columns_to_convert = ["Organization Name", "Best Model On Open LLM Leaderboard", "Most Downloaded Model", "Most Liked Model"]
187
+ models_df = make_leaderboard(org_names_in_list, "models")
188
+ models_df = models_df_to_clickable(models_df, columns_to_convert, "models")
 
189
 
190
+ headers = ["πŸ”’ Serial Number", "🏒 Organization Name", "πŸ“₯ Total Downloads", "πŸ‘ Total Likes", "πŸ€– Number of Models", "πŸ† Best Model On Open LLM Leaderboard", "πŸ₯‡ Best Rank On Open LLM Leaderboard", "πŸ“Š Average Downloads per Model", "πŸ“ˆ Average Likes per Model", "πŸš€ Most Downloaded Model", "πŸ“ˆ Most Download Count", "❀️ Most Liked Model", "πŸ‘ Most Like Count"]
191
+ gr.Dataframe(models_df, headers=headers, interactive=True, datatype=["str", "markdown", "str", "str", "str", "markdown", "str", "str", "str", "markdown", "str", "markdown", "str"])
 
192
 
193
+ with gr.TabItem("πŸ“Š Dataset", id=2):
194
+ columns_to_convert = ["Organization Name", "Most Downloaded Dataset", "Most Liked Dataset"]
195
+ dataset_df = make_leaderboard(org_names_in_list, "datasets")
196
+ dataset_df = models_df_to_clickable(dataset_df, columns_to_convert, "datasets")
197
 
198
+ headers = ["πŸ”’ Serial Number", "🏒 Organization Name", "πŸ“₯ Total Downloads", "πŸ‘ Total Likes", "πŸ“Š Number of Datasets", "πŸ“Š Average Downloads per Dataset", "πŸ“ˆ Average Likes per Dataset", "πŸš€ Most Downloaded Dataset", "πŸ“ˆ Most Download Count", "❀️ Most Liked Dataset", "πŸ‘ Most Like Count"]
199
+ gr.Dataframe(dataset_df, headers=headers, interactive=False, datatype=["str", "markdown", "str", "str", "str", "str", "str", "markdown", "str", "markdown", "str"])
200
 
201
+ with gr.TabItem("πŸš€ Spaces", id=3):
202
+ columns_to_convert = ["Organization Name", "Most Liked Space"]
203
 
204
+ spaces_df = make_leaderboard(org_names_in_list, "spaces")
205
+ spaces_df = models_df_to_clickable(spaces_df, columns_to_convert, "spaces")
206
 
207
+ headers = ["πŸ”’ Serial Number", "🏒 Organization Name", "πŸ‘ Total Likes", "πŸš€ Number of Spaces", "πŸ“ˆ Average Likes per Space", "❀️ Most Liked Space", "πŸ‘ Most Like Count"]
208
+ gr.Dataframe(spaces_df, headers=headers, interactive=False, datatype=["str", "markdown", "str", "str", "str", "markdown", "str"])
209
 
210
+ demo.launch(share=True, debug=True)