Weyaxi commited on
Commit
0c42d93
Β·
1 Parent(s): 7638fc7

trending feature

Browse files
Files changed (1) hide show
  1. app.py +62 -15
app.py CHANGED
@@ -77,7 +77,10 @@ def make_leaderboard(orgs, which_one):
77
  data_rows = []
78
  open_llm_leaderboard = get_openllm_leaderboard() if which_one == "models" else None
79
 
 
 
80
  for org in tqdm(orgs, desc=f"Scraping Organizations ({which_one})", position=0, leave=True):
 
81
  df = get_models(org, which_one)
82
  if len(df) == 0:
83
  continue
@@ -87,6 +90,7 @@ def make_leaderboard(orgs, which_one):
87
 
88
  if which_one == "models":
89
  open_llm_leaderboard_get_org = get_ranking(open_llm_leaderboard, org)
 
90
  data_rows.append({
91
  "Organization Name": org,
92
  "Total Downloads": sum_info["Downloads"],
@@ -99,9 +103,12 @@ def make_leaderboard(orgs, which_one):
99
  "Most Downloaded Model": most_info["Most Download"]["id"],
100
  "Most Download Count": most_info["Most Download"]["downloads"],
101
  "Most Liked Model": most_info["Most Likes"]["id"],
102
- "Most Like Count": most_info["Most Likes"]["likes"]
 
 
103
  })
104
  elif which_one == "datasets":
 
105
  data_rows.append({
106
  "Organization Name": org,
107
  "Total Downloads": sum_info["Downloads"],
@@ -112,17 +119,22 @@ def make_leaderboard(orgs, which_one):
112
  "Most Downloaded Dataset": most_info["Most Download"]["id"],
113
  "Most Download Count": most_info["Most Download"]["downloads"],
114
  "Most Liked Dataset": most_info["Most Likes"]["id"],
115
- "Most Like Count": most_info["Most Likes"]["likes"]
 
 
116
  })
117
 
118
  elif which_one == "spaces":
 
119
  data_rows.append({
120
  "Organization Name": org,
121
  "Total Likes": sum_info["Likes"],
122
  "Number of Spaces": num_things,
123
  "Average Likes per Space": int(sum_info["Likes"] / num_things) if num_things != 0 else 0,
124
  "Most Liked Space": most_info["Most Likes"]["id"],
125
- "Most Like Count": most_info["Most Likes"]["likes"]
 
 
126
  })
127
 
128
  leaderboard = pd.DataFrame(data_rows)
@@ -132,6 +144,9 @@ def make_leaderboard(orgs, which_one):
132
  leaderboard.insert(0, "Serial Number", range(1, len(leaderboard) + 1))
133
  return leaderboard
134
 
 
 
 
135
 
136
  with open("org_names.txt", "r") as f:
137
  org_names_in_list = [i.rstrip("\n") for i in f.readlines()]
@@ -139,7 +154,6 @@ with open("org_names.txt", "r") as f:
139
 
140
  INTRODUCTION_TEXT = f"""
141
  🎯 The Organization Leaderboard aims to track organization rankings. This space is inspired by the [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard).
142
-
143
  ## Available Dataframes:
144
 
145
  - πŸ›οΈ Models
@@ -155,6 +169,8 @@ INTRODUCTION_TEXT = f"""
155
  πŸ› οΈ Organization names are retrieved using web scraping from [Huggingface Organizations](https://huggingface.co/organizations).
156
 
157
  **🌐 Note:** In the model's dataframe, there are some columns related to the [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard). This data is also retrieved through web scraping.
 
 
158
  """
159
 
160
  def clickable(x, which_one):
@@ -164,8 +180,10 @@ def clickable(x, which_one):
164
  else:
165
  return "Not Found"
166
  else:
167
- return f'<a target="_blank" href="https://huggingface.co/{which_one}/{x}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{x}</a>'
168
-
 
 
169
  def models_df_to_clickable(df, columns, which_one):
170
  for column in columns:
171
  if column == "Organization Name":
@@ -174,6 +192,34 @@ def models_df_to_clickable(df, columns, which_one):
174
  df[column] = df[column].apply(lambda x: clickable(x, which_one))
175
  return df
176
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  demo = gr.Blocks()
178
 
179
  with gr.Blocks() as demo:
@@ -182,28 +228,29 @@ with gr.Blocks() as demo:
182
 
183
  with gr.TabItem("πŸ›οΈ Models", id=1):
184
 
185
- columns_to_convert = ["Organization Name", "Best Model On Open LLM Leaderboard", "Most Downloaded Model", "Most Liked Model"]
186
  models_df = make_leaderboard(org_names_in_list, "models")
187
  models_df = models_df_to_clickable(models_df, columns_to_convert, "models")
188
 
189
- headers = ["πŸ”’ Serial Number", "🏒 Organization Name", "πŸ“₯ Total Downloads", "πŸ‘ Total Likes", "πŸ€– Number of Models", "πŸ† Best Model On Open LLM Leaderboard", "πŸ₯‡ Best Rank On Open LLM Leaderboard", "πŸ“Š Average Downloads per Model", "πŸ“ˆ Average Likes per Model", "πŸš€ Most Downloaded Model", "πŸ“ˆ Most Download Count", "❀️ Most Liked Model", "πŸ‘ Most Like Count"]
190
- gr.Dataframe(models_df.head(400), headers=headers, interactive=True, datatype=["str", "markdown", "str", "str", "str", "markdown", "str", "str", "str", "markdown", "str", "markdown", "str"])
191
 
192
  with gr.TabItem("πŸ“Š Datasets", id=2):
193
- columns_to_convert = ["Organization Name", "Most Downloaded Dataset", "Most Liked Dataset"]
194
  dataset_df = make_leaderboard(org_names_in_list, "datasets")
195
  dataset_df = models_df_to_clickable(dataset_df, columns_to_convert, "datasets")
196
 
197
- headers = ["πŸ”’ Serial Number", "🏒 Organization Name", "πŸ“₯ Total Downloads", "πŸ‘ Total Likes", "πŸ“Š Number of Datasets", "πŸ“Š Average Downloads per Dataset", "πŸ“ˆ Average Likes per Dataset", "πŸš€ Most Downloaded Dataset", "πŸ“ˆ Most Download Count", "❀️ Most Liked Dataset", "πŸ‘ Most Like Count"]
198
- gr.Dataframe(dataset_df.head(250), headers=headers, interactive=False, datatype=["str", "markdown", "str", "str", "str", "str", "str", "markdown", "str", "markdown", "str"])
199
 
200
  with gr.TabItem("πŸš€ Spaces", id=3):
201
- columns_to_convert = ["Organization Name", "Most Liked Space"]
202
 
203
  spaces_df = make_leaderboard(org_names_in_list, "spaces")
204
  spaces_df = models_df_to_clickable(spaces_df, columns_to_convert, "spaces")
205
 
206
- headers = ["πŸ”’ Serial Number", "🏒 Organization Name", "πŸ‘ Total Likes", "πŸš€ Number of Spaces", "πŸ“ˆ Average Likes per Space", "❀️ Most Liked Space", "πŸ‘ Most Like Count"]
207
- gr.Dataframe(spaces_df.head(150), headers=headers, interactive=False, datatype=["str", "markdown", "str", "str", "str", "markdown", "str"])
208
 
209
  demo.launch()
 
 
77
  data_rows = []
78
  open_llm_leaderboard = get_openllm_leaderboard() if which_one == "models" else None
79
 
80
+ trend = get_trending_list(1, which_one)
81
+
82
  for org in tqdm(orgs, desc=f"Scraping Organizations ({which_one})", position=0, leave=True):
83
+ rank = get_ranking_trend(trend, org)
84
  df = get_models(org, which_one)
85
  if len(df) == 0:
86
  continue
 
90
 
91
  if which_one == "models":
92
  open_llm_leaderboard_get_org = get_ranking(open_llm_leaderboard, org)
93
+
94
  data_rows.append({
95
  "Organization Name": org,
96
  "Total Downloads": sum_info["Downloads"],
 
103
  "Most Downloaded Model": most_info["Most Download"]["id"],
104
  "Most Download Count": most_info["Most Download"]["downloads"],
105
  "Most Liked Model": most_info["Most Likes"]["id"],
106
+ "Most Like Count": most_info["Most Likes"]["likes"],
107
+ "Trending Model": rank['id'],
108
+ "Best Rank at Trending Models": rank['rank']
109
  })
110
  elif which_one == "datasets":
111
+
112
  data_rows.append({
113
  "Organization Name": org,
114
  "Total Downloads": sum_info["Downloads"],
 
119
  "Most Downloaded Dataset": most_info["Most Download"]["id"],
120
  "Most Download Count": most_info["Most Download"]["downloads"],
121
  "Most Liked Dataset": most_info["Most Likes"]["id"],
122
+ "Most Like Count": most_info["Most Likes"]["likes"],
123
+ "Trending Dataset": rank['id'],
124
+ "Best Rank at Trending Datasets": rank['rank']
125
  })
126
 
127
  elif which_one == "spaces":
128
+
129
  data_rows.append({
130
  "Organization Name": org,
131
  "Total Likes": sum_info["Likes"],
132
  "Number of Spaces": num_things,
133
  "Average Likes per Space": int(sum_info["Likes"] / num_things) if num_things != 0 else 0,
134
  "Most Liked Space": most_info["Most Likes"]["id"],
135
+ "Most Like Count": most_info["Most Likes"]["likes"],
136
+ "Trending Space": rank['id'],
137
+ "Best Rank at Trending Spaces": rank['rank']
138
  })
139
 
140
  leaderboard = pd.DataFrame(data_rows)
 
144
  leaderboard.insert(0, "Serial Number", range(1, len(leaderboard) + 1))
145
  return leaderboard
146
 
147
+ """# Gradio başlasın
148
+
149
+ """
150
 
151
  with open("org_names.txt", "r") as f:
152
  org_names_in_list = [i.rstrip("\n") for i in f.readlines()]
 
154
 
155
  INTRODUCTION_TEXT = f"""
156
  🎯 The Organization Leaderboard aims to track organization rankings. This space is inspired by the [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard).
 
157
  ## Available Dataframes:
158
 
159
  - πŸ›οΈ Models
 
169
  πŸ› οΈ Organization names are retrieved using web scraping from [Huggingface Organizations](https://huggingface.co/organizations).
170
 
171
  **🌐 Note:** In the model's dataframe, there are some columns related to the [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard). This data is also retrieved through web scraping.
172
+ *
173
+ *🌐 Note:** In trending models, first 300 models/datasets/spaces is being retrieved from huggingface.
174
  """
175
 
176
  def clickable(x, which_one):
 
180
  else:
181
  return "Not Found"
182
  else:
183
+ if x != "Not Found":
184
+ return f'<a target="_blank" href="https://huggingface.co/{which_one}/{x}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{x}</a>'
185
+ return "Not Found"
186
+
187
  def models_df_to_clickable(df, columns, which_one):
188
  for column in columns:
189
  if column == "Organization Name":
 
192
  df[column] = df[column].apply(lambda x: clickable(x, which_one))
193
  return df
194
 
195
+
196
+ def get_trending_list(pages, which_one):
197
+ trending_list = []
198
+ for i in range(pages):
199
+ json_data = requests.get(f"https://huggingface.co/{which_one}-json?p={i}").json()
200
+
201
+ for thing in json_data[which_one]:
202
+ id = thing["id"]
203
+ likes = thing["likes"]
204
+
205
+ if which_one != "spaces":
206
+ downloads = thing["downloads"]
207
+
208
+ trending_list.append({"id": id, "downloads": downloads, "likes": likes})
209
+ else:
210
+ trending_list.append({"id": id, "likes": likes})
211
+
212
+ return trending_list
213
+
214
+ def get_ranking_trend(json_data, org_name):
215
+ names = [item['id'].split("/")[0] for item in json_data]
216
+ models = [item['id'] for item in json_data]
217
+ if org_name in names:
218
+ temp = names.index(org_name)
219
+ return {"id": models[temp], "rank": temp+1}
220
+ else:
221
+ return {"id": "Not Found", "rank": "Not Found"}
222
+
223
  demo = gr.Blocks()
224
 
225
  with gr.Blocks() as demo:
 
228
 
229
  with gr.TabItem("πŸ›οΈ Models", id=1):
230
 
231
+ columns_to_convert = ["Organization Name", "Best Model On Open LLM Leaderboard", "Most Downloaded Model", "Most Liked Model", "Trending Model"]
232
  models_df = make_leaderboard(org_names_in_list, "models")
233
  models_df = models_df_to_clickable(models_df, columns_to_convert, "models")
234
 
235
+ headers = ["πŸ”’ Serial Number", "🏒 Organization Name", "πŸ“₯ Total Downloads", "πŸ‘ Total Likes", "πŸ€– Number of Models", "πŸ† Best Model On Open LLM Leaderboard", "πŸ₯‡ Best Rank On Open LLM Leaderboard", "πŸ“Š Average Downloads per Model", "πŸ“ˆ Average Likes per Model", "πŸš€ Most Downloaded Model", "πŸ“ˆ Most Download Count", "❀️ Most Liked Model", "πŸ‘ Most Like Count", "πŸ”₯ Trending Model", "πŸ‘‘ Best Rank at Trending Models"]
236
+ gr.Dataframe(models_df.head(400), headers=headers, interactive=True, datatype=["str", "markdown", "str", "str", "str", "markdown", "str", "str", "str", "markdown", "str", "markdown", "str", "markdown", "str"])
237
 
238
  with gr.TabItem("πŸ“Š Datasets", id=2):
239
+ columns_to_convert = ["Organization Name", "Most Downloaded Dataset", "Most Liked Dataset", "Trending Dataset"]
240
  dataset_df = make_leaderboard(org_names_in_list, "datasets")
241
  dataset_df = models_df_to_clickable(dataset_df, columns_to_convert, "datasets")
242
 
243
+ headers = ["πŸ”’ Serial Number", "🏒 Organization Name", "πŸ“₯ Total Downloads", "πŸ‘ Total Likes", "πŸ“Š Number of Datasets", "πŸ“Š Average Downloads per Dataset", "πŸ“ˆ Average Likes per Dataset", "πŸš€ Most Downloaded Dataset", "πŸ“ˆ Most Download Count", "❀️ Most Liked Dataset", "πŸ‘ Most Like Count", "πŸ”₯ Trending Dataset", "πŸ‘‘ Best Rank at Trending Datasets"]
244
+ gr.Dataframe(dataset_df.head(250), headers=headers, interactive=False, datatype=["str", "markdown", "str", "str", "str", "str", "str", "markdown", "str", "markdown", "str", "markdown", "str"])
245
 
246
  with gr.TabItem("πŸš€ Spaces", id=3):
247
+ columns_to_convert = ["Organization Name", "Most Liked Space", "Trending Space"]
248
 
249
  spaces_df = make_leaderboard(org_names_in_list, "spaces")
250
  spaces_df = models_df_to_clickable(spaces_df, columns_to_convert, "spaces")
251
 
252
+ headers = ["πŸ”’ Serial Number", "🏒 Organization Name", "πŸ‘ Total Likes", "πŸš€ Number of Spaces", "πŸ“ˆ Average Likes per Space", "❀️ Most Liked Space", "πŸ‘ Most Like Count", "πŸ”₯ Trending Space", "πŸ‘‘ Best Rank at Trending Spaces"]
253
+ gr.Dataframe(spaces_df.head(150), headers=headers, interactive=False, datatype=["str", "markdown", "str", "str", "str", "markdown", "str", "markdown", "str"])
254
 
255
  demo.launch()
256
+