John6666 commited on
Commit
a8f6f3c
Β·
verified Β·
1 Parent(s): 2303139

Upload 8 files

Browse files
Files changed (6) hide show
  1. README.md +2 -2
  2. app.py +81 -18
  3. hfconstants.py +7 -0
  4. hfsearch.py +263 -68
  5. subtags.json +0 -0
  6. tags.json +0 -0
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- title: test warm models
3
- emoji: πŸ™„
4
  colorFrom: indigo
5
  colorTo: purple
6
  sdk: gradio
 
1
  ---
2
+ title: Search HFπŸ€— Inference API warm models
3
+ emoji: πŸ€—πŸ”
4
  colorFrom: indigo
5
  colorTo: purple
6
  sdk: gradio
app.py CHANGED
@@ -1,36 +1,99 @@
1
  import spaces
2
  import gradio as gr
3
- from hfsearch import HFSearchResult, search, update_filter, update_df
 
 
4
 
5
- with gr.Blocks(fill_width=True) as demo:
 
 
 
 
 
 
6
  with gr.Column():
7
  search_result = gr.State(value=HFSearchResult())
8
- with gr.Group():
9
- with gr.Row(equal_height=True):
10
- infer_status = gr.Radio(label="Inference status", choices=["warm", "cold", "frozen", "all"], value="warm")
11
- gated_status = gr.Radio(label="Gated status", choices=["gated", "non-gated", "all"], value="non-gated")
12
- appr_status = gr.CheckboxGroup(label="Approval method", choices=["auto", "manual"], value=["auto", "manual"])
13
- with gr.Accordion("Advanced", open=False):
14
  with gr.Row(equal_height=True):
15
- filter = gr.Textbox(label="Query", value="")
16
- author = gr.Textbox(label="Author", value="")
17
- sort = gr.Radio(label="Sort", choices=["last_modified", "likes", "downloads"], value="likes")
18
- sort_method = gr.Radio(label="Sort method", choices=["ascending order", "descending order"], value="ascending order")
19
- limit = gr.Number(label="Limit", info="If 0, fetches all models", value=1000, step=1, minimum=0, maximum=10000000)
20
- run_button = gr.Button("Search", variant="primary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  with gr.Group():
22
  with gr.Accordion("Filter", open=False):
23
- hide_item = gr.CheckboxGroup(label="Hide items", choices=[], value=[], visible=False)
24
  with gr.Row(equal_height=True):
25
  filter_item1 = gr.Dropdown(label="Filter item", choices=[""], value="", visible=False)
26
  filter1 = gr.Dropdown(label="Filter", choices=[""], value="", allow_custom_value=True, visible=False)
27
  filter_btn = gr.Button("Apply filter", variant="secondary", visible=False)
28
- result_df = gr.DataFrame(label="Results", type="array", value=[[]], interactive=False)
29
 
30
- run_button.click(search, [sort, sort_method, filter, author, infer_status, gated_status, appr_status, limit, search_result], [result_df, hide_item, search_result])\
 
 
 
 
 
 
31
  .success(update_filter, [filter_item1, search_result], [filter_item1, filter1, filter_btn, search_result], queue=False)
32
- gr.on(triggers=[hide_item.change, filter_btn.click], fn=update_df, inputs=[hide_item, filter_item1, filter1, search_result],
33
  outputs=[result_df, search_result], trigger_mode="once", queue=False, show_api=False)
34
  filter_item1.change(update_filter, [filter_item1, search_result], [filter_item1, filter1, filter_btn, search_result], queue=False, show_api=False)
 
 
 
35
 
36
  demo.queue().launch()
 
1
  import spaces
2
  import gradio as gr
3
+ from hfsearch import (HFSearchResult, search, update_filter, update_df, get_labels, get_valid_labels,
4
+ get_tags, get_subtag_categories, update_subtag_items, update_tags, update_subtags,
5
+ DS_SIZE_CATEGORIES, SPACE_HARDWARES, SPACE_STAGES)
6
 
7
+ CSS = """
8
+ .title { align-items: center; text-align: center; }
9
+ .info { align-items: center; text-align: center; }
10
+ """
11
+
12
+ with gr.Blocks(theme="NoCrypt/miku", fill_width=True, css=CSS) as demo:
13
+ gr.Markdown("# Search Hugging FaceπŸ€—", elem_classes="title")
14
  with gr.Column():
15
  search_result = gr.State(value=HFSearchResult())
16
+ with gr.Tab("Normal Search"):
17
+ with gr.Group():
 
 
 
 
18
  with gr.Row(equal_height=True):
19
+ repo_types = gr.CheckboxGroup(label="Repo type", choices=["model", "dataset", "space"], value=["model", "dataset", "space"])
20
+ with gr.Accordion("Advanced", open=False):
21
+ with gr.Row(equal_height=True):
22
+ filter_str = gr.Textbox(label="Filter", info="String(s) to filter repos", value="")
23
+ search_str = gr.Textbox(label="Search", info="A string that will be contained in the returned repo ids", placeholder="bert", value="", lines=1)
24
+ author = gr.Textbox(label="Author", info="The author (user or organization)", value="", lines=1)
25
+ with gr.Column():
26
+ tags = gr.Textbox(label="Tags", info="Tag(s) to filter repos", value="")
27
+ with gr.Accordion("Tag input assistance", open=False):
28
+ with gr.Row(equal_height=True):
29
+ tag_item = gr.Dropdown(label="Item", choices=get_tags(), value=get_tags()[0], allow_custom_value=True, scale=4)
30
+ tag_btn = gr.Button("Add", scale=1)
31
+ with gr.Row(equal_height=True):
32
+ subtag_cat = gr.Dropdown(label="Category", choices=get_subtag_categories(), value=get_subtag_categories()[0], scale=2)
33
+ subtag_item = gr.Dropdown(label="Item", choices=[""], value="", allow_custom_value=True, scale=2)
34
+ subtug_btn = gr.Button("Add", scale=1)
35
+ with gr.Column():
36
+ gated_status = gr.Radio(label="Gated status", choices=["gated", "non-gated", "all"], value="all")
37
+ appr_status = gr.CheckboxGroup(label="Approval method", choices=["auto", "manual"], value=["auto", "manual"])
38
+ with gr.Tab("for Models"):
39
+ with gr.Column():
40
+ infer_status = gr.Radio(label="Inference status", choices=["warm", "cold", "frozen", "all"], value="all")
41
+ gr.Markdown("[About the Inference API status (Warm, Cold, Frozen)](https://huggingface.co/docs/api-inference/supported-models)", elem_classes="info")
42
+ # with gr.Row(equal_height=True):
43
+ # model_task = gr.Textbox(label="Task", info="String(s) of tasks models were designed for", placeholder="fill-mask", value="")
44
+ # trained_dataset = gr.Textbox(label="Trained dataset", info="Trained dataset for a model", value="")
45
+ with gr.Tab("for Datasets"):
46
+ size_categories = gr.CheckboxGroup(label="Size categories", info="The size of the dataset", choices=DS_SIZE_CATEGORIES, value=[])
47
+ # task_categories = gr.Textbox(label="Task categories", info="Identify datasets by the designed task", value="")
48
+ # task_ids = gr.Textbox(label="Task IDs", info="Identify datasets by the specific task", value="")
49
+ # language_creators = gr.Textbox(label="Language creators", info="Identify datasets with how the data was curated", value="")
50
+ # language = gr.Textbox(label="Language", info="String(s) representing two-character language to filter datasets by", value="")
51
+ # multilinguality = gr.Textbox(label="Multilinguality", info="String(s) representing a filter for datasets that contain multiple languages", value="")
52
+ with gr.Tab("for Spaces"):
53
+ with gr.Row(equal_height=True):
54
+ hardware = gr.CheckboxGroup(label="Specify hardware", choices=SPACE_HARDWARES, value=[])
55
+ stage = gr.CheckboxGroup(label="Specify stage", choices=SPACE_STAGES, value=[])
56
+ with gr.Row(equal_height=True):
57
+ sort = gr.Radio(label="Sort", choices=["last_modified", "likes", "downloads", "trending_score"], value="likes")
58
+ sort_method = gr.Radio(label="Sort method", choices=["ascending order", "descending order"], value="ascending order")
59
+ limit = gr.Number(label="Limit", info="If 0, fetches all models", value=1000, step=1, minimum=0, maximum=10000000)
60
+ fetch_detail = gr.CheckboxGroup(label="Fetch detail", choices=["Space Runtime"], value=["Space Runtime"])
61
+ with gr.Row(equal_height=True):
62
+ show_labels = gr.CheckboxGroup(label="Show items", choices=get_labels(), value=get_valid_labels())
63
+ run_button = gr.Button("Search", variant="primary")
64
+ with gr.Tab("Find Serverless Inference API enabled models"):
65
+ with gr.Group():
66
+ with gr.Row(equal_height=True):
67
+ infer_repo_types = gr.CheckboxGroup(label="Repo type", choices=["model", "dataset", "space"], value=["model"], visible=False)
68
+ with gr.Column():
69
+ infer_infer_status = gr.Radio(label="Inference status", choices=["warm", "cold", "frozen", "all"], value="warm")
70
+ gr.Markdown("[About the Inference API status (Warm, Cold, Frozen)](https://huggingface.co/docs/api-inference/supported-models)", elem_classes="info")
71
+ with gr.Column():
72
+ infer_gated_status = gr.Radio(label="Gated status", choices=["gated", "non-gated", "all"], value="all")
73
+ infer_appr_status = gr.CheckboxGroup(label="Approval method", choices=["auto", "manual"], value=["auto", "manual"])
74
+ infer_run_button = gr.Button("Search", variant="primary")
75
  with gr.Group():
76
  with gr.Accordion("Filter", open=False):
77
+ hide_labels = gr.CheckboxGroup(label="Hide items", choices=[], value=[], visible=False)
78
  with gr.Row(equal_height=True):
79
  filter_item1 = gr.Dropdown(label="Filter item", choices=[""], value="", visible=False)
80
  filter1 = gr.Dropdown(label="Filter", choices=[""], value="", allow_custom_value=True, visible=False)
81
  filter_btn = gr.Button("Apply filter", variant="secondary", visible=False)
82
+ result_df = gr.DataFrame(label="Results", type="pandas", value=None, interactive=False)
83
 
84
+ run_button.click(search, [repo_types, sort, sort_method, filter_str, search_str, author, tags, infer_status, gated_status, appr_status,
85
+ size_categories, limit, hardware, stage, fetch_detail, show_labels, search_result],
86
+ [result_df, hide_labels, search_result])\
87
+ .success(update_filter, [filter_item1, search_result], [filter_item1, filter1, filter_btn, search_result], queue=False)
88
+ infer_run_button.click(search, [infer_repo_types, sort, sort_method, filter_str, search_str, author, tags, infer_infer_status, infer_gated_status, infer_appr_status,
89
+ size_categories, limit, hardware, stage, fetch_detail, show_labels, search_result],
90
+ [result_df, hide_labels, search_result])\
91
  .success(update_filter, [filter_item1, search_result], [filter_item1, filter1, filter_btn, search_result], queue=False)
92
+ gr.on(triggers=[hide_labels.change, filter_btn.click], fn=update_df, inputs=[hide_labels, filter_item1, filter1, search_result],
93
  outputs=[result_df, search_result], trigger_mode="once", queue=False, show_api=False)
94
  filter_item1.change(update_filter, [filter_item1, search_result], [filter_item1, filter1, filter_btn, search_result], queue=False, show_api=False)
95
+ subtag_cat.change(update_subtag_items, [subtag_cat], [subtag_item], queue=False, show_api=False)
96
+ subtug_btn.click(update_subtags, [tags, subtag_cat, subtag_item], [tags], queue=False, show_api=False)
97
+ tag_btn.click(update_tags, [tags, tag_item], [tags], queue=False, show_api=False)
98
 
99
  demo.queue().launch()
hfconstants.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+
2
+ DS_SIZE_CATEGORIES = ["n<1K", "1K<n<10K", "10K<n<100K", "100K<n<1M", "1M<n<10M", "10M<n<100M",
3
+ "100M<n<1B", "1B<n<10B", "10B<n<100B", "100B<n<1T", "n>1T"]
4
+
5
+ SPACE_HARDWARES = ["cpu-basic", "zero-a10g", "cpu-upgrade", "t4-small", "l4x1", "a10g-large", "l40sx1", "a10g-small", "t4-medium", "cpu-xl", "a100-large"]
6
+
7
+ SPACE_STAGES = ["RUNNING", "SLEEPING", "RUNTIME_ERROR", "PAUSED", "BUILD_ERROR", "CONFIG_ERROR", "BUILDING", "APP_STARTING", "RUNNING_APP_STARTING"]
hfsearch.py CHANGED
@@ -1,7 +1,82 @@
1
  import spaces
2
  import gradio as gr
3
- from huggingface_hub import HfApi
 
4
  import gc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  class Labels():
7
  VALID_DTYPE = ["str", "number", "bool", "date", "markdown"]
@@ -9,65 +84,210 @@ class Labels():
9
  def __init__(self):
10
  self.types = {}
11
  self.orders = {}
 
12
 
13
- def set(self, label: str, type: str="str", order: int=255):
14
- if type not in self.VALID_DTYPE: raise Exception(f"Invalid data type: {type}")
15
- self.types[label] = type
16
- self.orders[label] = order
 
 
 
 
17
 
18
  def get(self):
19
  labels = list(self.types.keys())
20
  labels.sort(key=lambda x: self.orders[x])
21
  label_types = [self.types[s] for s in labels]
22
  return labels, label_types
23
-
 
 
 
 
 
24
  def get_null_value(self, type: str):
25
  if type == "bool": return False
26
  elif type == "number" or type == "date": return 0
27
  else: return "None"
28
 
 
 
29
  class HFSearchResult():
30
  def __init__(self):
31
  self.labels = Labels()
32
  self.current_item = {}
33
- self.current_show_item = {}
34
  self.item_list = []
35
- self.show_item_list = []
36
  self.item_hide_flags = []
37
- self.hide_item = []
 
38
  self.filter_items = None
39
  self.filters = None
40
  gc.collect()
41
 
42
  def reset(self):
43
  self.__init__()
44
-
45
- def set(self, data, label: str, type: str="str", order: int=255, show_data=None):
46
- self.labels.set(label, type, order)
47
  self.current_item[label] = data
48
- if show_data is not None: self.current_show_item[label] = show_data
49
-
50
- def next(self):
51
  self.item_list.append(self.current_item.copy())
52
  self.current_item = {}
53
- self.show_item_list.append(self.current_show_item.copy())
54
- self.current_show_item = {}
 
55
 
56
- def get(self):
57
- labels, label_types = self.labels.get()
58
- df = [[item.get(l, self.labels.get_null_value(t)) for l, t in zip(labels, label_types)] for item in self.item_list]
59
- return df, labels, label_types
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
- def get_show(self):
62
  labels, label_types = self.labels.get()
63
  self._do_filter()
64
- df = [[show_item.get(l, self.labels.get_null_value(t)) if l in show_item.keys() else item.get(l, self.labels.get_null_value(t)) for l, t in zip(labels, label_types) if l not in set(self.hide_item)] for item, show_item, is_hide in zip(self.item_list, self.show_item_list, self.item_hide_flags) if not is_hide]
65
- show_label_types = [t for l, t in zip(labels, label_types) if l not in self.hide_item]
66
- show_labels = [l for l in labels if l not in self.hide_item]
 
67
  return df, show_labels, show_label_types
68
 
69
- def set_hide(self, hide_item: list):
70
- self.hide_item = hide_item
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
  def set_filter(self, filter_item1: str, filter1: str):
73
  if not filter_item1 and not filter1:
@@ -100,11 +320,17 @@ class HFSearchResult():
100
  flags.append(flag)
101
  self.item_hide_flags = flags
102
 
 
 
 
 
 
103
  def get_gr_df(self):
104
- df, labels, label_types = self.get_show()
105
- return gr.update(type="array", value=df, headers=labels, datatype=label_types)
 
106
 
107
- def get_gr_hide_item(self):
108
  return gr.update(choices=self.labels.get()[0], value=[], visible=True)
109
 
110
  def get_gr_filter_item(self, filter_item: str=""):
@@ -124,48 +350,17 @@ class HFSearchResult():
124
  else: d[v] = 1
125
  return gr.update(choices=[""] + [t[0] for t in sorted(d.items(), key=lambda x : x[1])][:100], value="", visible=True)
126
 
127
- def md_lb(s: str, count: int):
128
- return "<br>".join([s[i:i+count] for i in range(0, len(s), count)])
129
-
130
- # https://huggingface.co/docs/huggingface_hub/package_reference/hf_api
131
- # https://huggingface.co/docs/huggingface_hub/package_reference/hf_api#huggingface_hub.ModelInfo
132
- @spaces.GPU
133
- def search(sort: str, sort_method: str, filter: str, author: str, infer: str, gated: str, appr: list[str], limit: int, r: HFSearchResult):
134
  try:
135
- api = HfApi()
136
- kwargs = {}
137
- if filter: kwargs["filter"] = filter
138
- if author: kwargs["author"] = author
139
- if gated == "gated": kwargs["gated"] = True
140
- elif gated == "non-gated": kwargs["gated"] = False
141
- if infer != "all": kwargs["inference"] = infer
142
- if sort_method == "descending order": kwargs["direction"] = -1
143
- if limit > 0: kwargs["limit"] = limit
144
- models = api.list_models(sort=sort, cardData=True, full=True, **kwargs)
145
- r.reset()
146
- i = 1
147
- for model in models:
148
- if model.gated is not None and model.gated and model.gated not in appr: continue
149
- r.set(i, "No.", "number", 0)
150
- r.set(model.id, "Model", "markdown", 2, f"[{md_lb(model.id, 48)}](https://hf.co/{model.id})")
151
- if model.inference is not None: r.set(model.inference, "Status", "markdown", 4, md_lb(model.inference, 8))
152
- #if infer != "all": r.set(infer, "Status", "markdown", 4)
153
- if model.gated is not None: r.set(model.gated if model.gated else "off", "Gated", "str", 6)
154
- #if gated != "all": r.set("on" if gated == "gated" else "off", "Gated", "str", 6)
155
- if model.library_name is not None: r.set(model.library_name, "Library", "markdown", 10, md_lb(model.library_name, 12))
156
- if model.pipeline_tag is not None: r.set(model.pipeline_tag, "Pipeline", "markdown", 11, md_lb(model.pipeline_tag, 15))
157
- if model.last_modified is not None: r.set(model.last_modified, "LastMod.", "date", 12)
158
- if model.likes is not None: r.set(model.likes, "Likes", "number", 13)
159
- if model.downloads is not None: r.set(model.downloads, "DLs", "number", 14)
160
- if model.downloads_all_time is not None: r.set(model.downloads_all_time, "AllDLs", "number", 15)
161
- r.next()
162
- i += 1
163
- return r.get_gr_df(), r.get_gr_hide_item(), r
164
  except Exception as e:
165
  raise gr.Error(e)
166
 
167
- def update_df(hide_item: list, filter_item1: str, filter1: str, r: HFSearchResult):
168
- r.set_hide(hide_item)
169
  r.set_filter(filter_item1, filter1)
170
  return r.get_gr_df(), r
171
 
 
1
  import spaces
2
  import gradio as gr
3
+ from huggingface_hub import HfApi, ModelInfo, DatasetInfo, SpaceInfo
4
+ from typing import Union
5
  import gc
6
+ import pandas as pd
7
+ import datetime
8
+ import json
9
+ import re
10
+ from hfconstants import DS_SIZE_CATEGORIES, SPACE_HARDWARES, SPACE_STAGES
11
+
12
+ @spaces.GPU
13
+ def dummy_gpu():
14
+ pass
15
+
16
+ RESULT_ITEMS = {
17
+ "Type": [1, "str", True],
18
+ "ID": [2, "markdown", True, "40%"],
19
+ "Status": [4, "markdown", True],
20
+ "Gated": [6, "str", True],
21
+ "Likes": [10, "number", True],
22
+ "DLs": [12, "number", True],
23
+ "AllDLs": [13, "number", False],
24
+ "Trending": [16, "number", True],
25
+ "LastMod.": [17, "str", True],
26
+ "Library": [20, "markdown", False],
27
+ "Pipeline": [21, "markdown", True],
28
+ "Hardware": [25, "str", False],
29
+ "Stage": [26, "str", False],
30
+ "NFAA": [40, "str", False],
31
+ }
32
+
33
+ try:
34
+ with open("tags.json", encoding="utf-8") as f:
35
+ TAGS = json.load(f)
36
+ with open("subtags.json", encoding="utf-8") as f:
37
+ SUBTAGS = json.load(f)
38
+ except Exception as e:
39
+ TAGS = []
40
+ SUBTAGS = {}
41
+ print(e)
42
+
43
+ def get_tags():
44
+ return TAGS[0:1000]
45
+
46
+ def get_subtag_categories():
47
+ return list(SUBTAGS.keys())
48
+
49
+ def update_subtag_items(category: str):
50
+ choices=[""] + list(SUBTAGS.get(category, []))
51
+ return gr.update(choices=choices, value=choices[0])
52
+
53
+ def update_subtags(tags: str, category: str, item: str):
54
+ addtag = f"{category}:{item}" if item else ""
55
+ newtags = f"{tags}\n{addtag}" if tags else addtag
56
+ return newtags
57
+
58
+ def update_tags(tags: str, item: str):
59
+ newtags = f"{tags}\n{item}" if tags else item
60
+ return newtags
61
+
62
+ def str_to_list(s: str):
63
+ try:
64
+ m = re.split("\n", s)
65
+ return [s.strip() for s in list(m)]
66
+ except Exception:
67
+ return []
68
+
69
+ def is_valid_arg(s: str):
70
+ return len(str_to_list(s)) > 0
71
+
72
+ def get_labels():
73
+ return list(RESULT_ITEMS.keys())
74
+
75
+ def get_valid_labels():
76
+ return [k for k in list(RESULT_ITEMS.keys()) if RESULT_ITEMS[k][2]]
77
+
78
+ def date_to_str(dt: datetime.datetime):
79
+ return dt.strftime('%Y-%m-%d %H:%M')
80
 
81
  class Labels():
82
  VALID_DTYPE = ["str", "number", "bool", "date", "markdown"]
 
84
  def __init__(self):
85
  self.types = {}
86
  self.orders = {}
87
+ self.widths = {}
88
 
89
+ def set(self, label: str):
90
+ if not label in RESULT_ITEMS.keys(): raise Exception(f"Invalid item: {label}")
91
+ item = RESULT_ITEMS.get(label)
92
+ if item[1] not in self.VALID_DTYPE: raise Exception(f"Invalid data type: {type}")
93
+ self.types[label] = item[1]
94
+ self.orders[label] = item[0]
95
+ if len(item) > 3: self.widths[label] = item[3]
96
+ else: self.widths[label] = "10%"
97
 
98
  def get(self):
99
  labels = list(self.types.keys())
100
  labels.sort(key=lambda x: self.orders[x])
101
  label_types = [self.types[s] for s in labels]
102
  return labels, label_types
103
+
104
+ def get_widths(self):
105
+ labels = list(self.types.keys())
106
+ label_widths = [self.widths[s] for s in labels]
107
+ return label_widths
108
+
109
  def get_null_value(self, type: str):
110
  if type == "bool": return False
111
  elif type == "number" or type == "date": return 0
112
  else: return "None"
113
 
114
+ # https://huggingface.co/docs/huggingface_hub/package_reference/hf_api
115
+ # https://huggingface.co/docs/huggingface_hub/package_reference/hf_api#huggingface_hub.ModelInfo
116
  class HFSearchResult():
117
  def __init__(self):
118
  self.labels = Labels()
119
  self.current_item = {}
120
+ self.current_item_info = None
121
  self.item_list = []
122
+ self.item_info_list = []
123
  self.item_hide_flags = []
124
+ self.hide_labels = []
125
+ self.show_labels = []
126
  self.filter_items = None
127
  self.filters = None
128
  gc.collect()
129
 
130
  def reset(self):
131
  self.__init__()
132
+
133
+ def _set(self, data, label: str):
134
+ self.labels.set(label)
135
  self.current_item[label] = data
136
+
137
+ def _next(self):
 
138
  self.item_list.append(self.current_item.copy())
139
  self.current_item = {}
140
+ self.item_info_list.append(self.current_item_info)
141
+ self.current_item_info = None
142
+ self.item_hide_flags.append(False)
143
 
144
+ def add_item(self, i: Union[ModelInfo, DatasetInfo, SpaceInfo]):
145
+ self.current_item_info = i
146
+ if isinstance(i, ModelInfo): type = "model"
147
+ elif isinstance(i, DatasetInfo): type = "dataset"
148
+ elif isinstance(i, SpaceInfo): type = "space"
149
+ else: return
150
+ self._set(type, "Type")
151
+ self._set(i.id, "ID")
152
+ if i.likes is not None: self._set(i.likes, "Likes")
153
+ if i.last_modified is not None: self._set(date_to_str(i.last_modified), "LastMod.")
154
+ if i.trending_score is not None: self._set(int(i.trending_score), "Trending")
155
+ if i.tags is not None: self._set("True" if "not-for-all-audiences" in i.tags else "False", "NFAA")
156
+ if type in ["model", "dataset"]:
157
+ if i.gated is not None: self._set(i.gated if i.gated else "off", "Gated")
158
+ if i.downloads is not None: self._set(i.downloads, "DLs")
159
+ if i.downloads_all_time is not None: self._set(i.downloads_all_time, "AllDLs")
160
+ if type == "model":
161
+ if i.inference is not None: self._set(i.inference, "Status")
162
+ if i.library_name is not None: self._set(i.library_name, "Library")
163
+ if i.pipeline_tag is not None: self._set(i.pipeline_tag, "Pipeline")
164
+ if type == "space":
165
+ if i.runtime is not None:
166
+ self._set(i.runtime.hardware, "Hardware")
167
+ self._set(i.runtime.stage, "Stage")
168
+ self._next()
169
+
170
+ def search(self, repo_types: list, sort: str, sort_method: str, filter_str: str, search_str: str, author: str, tags: str, infer: str, gated: str, appr: list[str],
171
+ size_categories: list, limit: int, hardware: list, stage: list, fetch_detail: list, show_labels: list):
172
+ try:
173
+ self.reset()
174
+ self.show_labels = show_labels.copy()
175
+ api = HfApi()
176
+ kwargs = {}
177
+ mkwargs = {}
178
+ dkwargs = {}
179
+ skwargs = {}
180
+ if filter_str: kwargs["filter"] = str_to_list(filter_str)
181
+ if search_str: kwargs["search"] = search_str
182
+ if author: kwargs["author"] = author
183
+ if tags and is_valid_arg(tags):
184
+ mkwargs["tags"] = str_to_list(tags)
185
+ dkwargs["tags"] = str_to_list(tags)
186
+ if limit > 0: kwargs["limit"] = limit
187
+ if sort_method == "descending order": kwargs["direction"] = -1
188
+ if gated == "gated":
189
+ mkwargs["gated"] = True
190
+ dkwargs["gated"] = True
191
+ elif gated == "non-gated":
192
+ mkwargs["gated"] = False
193
+ dkwargs["gated"] = False
194
+ mkwargs["sort"] = sort
195
+ if len(size_categories) > 0: dkwargs["size_categories"] = size_categories
196
+ if infer != "all": mkwargs["inference"] = infer
197
+ if "model" in repo_types:
198
+ models = api.list_models(full=True, cardData=True, **kwargs, **mkwargs)
199
+ for model in models:
200
+ if model.gated is not None and model.gated and model.gated not in appr: continue
201
+ self.add_item(model)
202
+ if "dataset" in repo_types:
203
+ datasets = api.list_datasets(full=True, **kwargs, **dkwargs)
204
+ for dataset in datasets:
205
+ if dataset.gated is not None and dataset.gated and dataset.gated not in appr: continue
206
+ self.add_item(dataset)
207
+ if "space" in repo_types:
208
+ if "Space Runtime" in fetch_detail:
209
+ spaces = api.list_spaces(expand=["cardData", "datasets", "disabled", "lastModified", "createdAt",
210
+ "likes", "models", "private", "runtime", "sdk", "sha", "tags", "trendingScore"], **kwargs, **skwargs)
211
+ else: spaces = api.list_spaces(full=True, **kwargs, **skwargs)
212
+ for space in spaces:
213
+ if space.gated is not None and space.gated and space.gated not in appr: continue
214
+ if space.runtime is not None:
215
+ if len(hardware) > 0 and space.runtime.stage == "RUNNING" and space.runtime.hardware not in hardware: continue
216
+ if len(stage) > 0 and space.runtime.stage not in stage: continue
217
+ self.add_item(space)
218
+ if sort == "downloads" and ("space" not in repo_types): self.sort("DLs")
219
+ elif sort == "downloads_all_time" and ("space" not in repo_types): self.sort("AllDLs")
220
+ elif sort == "likes": self.sort("Likes")
221
+ elif sort == "trending_score": self.sort("Trending")
222
+ else: self.sort("LastMod.")
223
+ except Exception as e:
224
+ raise Exception(f"Search error: {e}") from e
225
 
226
+ def get(self):
227
  labels, label_types = self.labels.get()
228
  self._do_filter()
229
+ dflist = [[item.get(l, self.labels.get_null_value(t)) for l, t in zip(labels, label_types)] for item, is_hide in zip(self.item_list, self.item_hide_flags) if not is_hide]
230
+ df = self._to_pandas(dflist, labels)
231
+ show_label_types = [t for l, t in zip(labels, label_types) if l not in self.hide_labels and l in self.show_labels]
232
+ show_labels = [l for l in labels if l not in self.hide_labels and l in self.show_labels]
233
  return df, show_labels, show_label_types
234
 
235
+ def _to_pandas(self, dflist: list, labels: list):
236
+ # https://pandas.pydata.org/docs/reference/api/pandas.io.formats.style.Styler.apply.html
237
+ # https://stackoverflow.com/questions/41654949/pandas-style-function-to-highlight-specific-columns
238
+ # https://stackoverflow.com/questions/69832206/pandas-styling-with-conditional-rules
239
+ # https://stackoverflow.com/questions/41203959/conditionally-format-python-pandas-cell
240
+ # https://stackoverflow.com/questions/51187868/how-do-i-remove-and-re-sort-reindex-columns-after-applying-style-in-python-pan
241
+ # https://stackoverflow.com/questions/36921951/truth-value-of-a-series-is-ambiguous-use-a-empty-a-bool-a-item-a-any-o
242
+ def rank_df(sdf: pd.DataFrame, df: pd.DataFrame, col: str):
243
+ ranks = [(0.5, "gold"), (0.75, "orange"), (0.9, "orangered")]
244
+ for t, color in ranks:
245
+ sdf.loc[df[col] >= df[col].quantile(q=t), [col]] = f'color: {color}'
246
+ return sdf
247
+
248
+ def highlight_df(x: pd.DataFrame, df: pd.DataFrame):
249
+ sdf = pd.DataFrame("", index=x.copy().index, columns=x.copy().columns)
250
+ columns = df.columns
251
+ if "Trending" in columns: sdf = rank_df(sdf, df, "Trending")
252
+ if "Likes" in columns: sdf = rank_df(sdf, df, "Likes")
253
+ if "AllDLs" in columns: sdf = rank_df(sdf, df, "AllDLs")
254
+ if "DLs" in columns: sdf = rank_df(sdf, df, "DLs")
255
+ if "Status" in columns:
256
+ sdf.loc[df["Status"] == "warm", ["Type"]] = 'color: orange'
257
+ sdf.loc[df["Status"] == "cold", ["Type"]] = 'color: dodgerblue'
258
+ if "Gated" in columns:
259
+ sdf.loc[df["Gated"] == "auto", ["Gated"]] = 'color: dodgerblue'
260
+ sdf.loc[df["Gated"] == "manual", ["Gated"]] = 'color: crimson'
261
+ if "Stage" in columns and "Hardware" in columns:
262
+ sdf.loc[(df["Stage"] == "RUNNING") & (df["Hardware"] != "zero-a10g") & (df["Hardware"] != "cpu-basic") & (df["Hardware"] != "None") & (df["Hardware"]), ["Hardware", "Type"]] = 'color: lime'
263
+ sdf.loc[(df["Stage"] == "RUNNING") & (df["Hardware"] == "zero-a10g"), ["Hardware", "Type"]] = 'color: green'
264
+ sdf.loc[(df["Type"] == "space") & (df["Stage"] != "RUNNING")] = 'opacity: 0.5'
265
+ sdf.loc[(df["Type"] == "space") & (df["Stage"] != "RUNNING"), ["Type"]] = 'color: crimson'
266
+ sdf.loc[df["Stage"] == "RUNNING", ["Stage"]] = 'color: lime'
267
+ if "NFAA" in columns: sdf.loc[df["NFAA"] == "True", ["Type"]] = 'background-color: hotpink'
268
+ show_columns = x.copy().columns
269
+ style_columns = sdf.columns
270
+ drop_columns = [c for c in style_columns if c not in show_columns]
271
+ sdf = sdf.drop(drop_columns, axis=1)
272
+ return sdf
273
+
274
+ def id_to_md(df: pd.DataFrame):
275
+ if df["Type"] == "dataset": return f'[{df["ID"]}](https://hf.co/datasets/{df["ID"]})'
276
+ elif df["Type"] == "space": return f'[{df["ID"]}](https://hf.co/spaces/{df["ID"]})'
277
+ else: return f'[{df["ID"]}](https://hf.co/{df["ID"]})'
278
+
279
+ def format_md_df(df: pd.DataFrame):
280
+ df["ID"] = df.apply(id_to_md, axis=1)
281
+ return df
282
+
283
+ hide_labels = [l for l in labels if l in self.hide_labels or l not in self.show_labels]
284
+ df = format_md_df(pd.DataFrame(dflist, columns=labels))
285
+ ref_df = df.copy()
286
+ df = df.drop(hide_labels, axis=1).style.apply(highlight_df, axis=None, df=ref_df)
287
+ return df
288
+
289
+ def set_hide(self, hide_labels: list):
290
+ self.hide_labels = hide_labels.copy()
291
 
292
  def set_filter(self, filter_item1: str, filter1: str):
293
  if not filter_item1 and not filter1:
 
320
  flags.append(flag)
321
  self.item_hide_flags = flags
322
 
323
+ def sort(self, key="Likes"):
324
+ if len(self.item_list) == 0: raise Exception("No item found.")
325
+ if not key in self.labels.get()[0]: key = "Likes"
326
+ self.item_list, self.item_hide_flags, self.item_info_list = zip(*sorted(zip(self.item_list, self.item_hide_flags, self.item_info_list), key=lambda x: x[0][key], reverse=True))
327
+
328
  def get_gr_df(self):
329
+ df, labels, label_types = self.get()
330
+ widths = self.labels.get_widths()
331
+ return gr.update(type="pandas", value=df, headers=labels, datatype=label_types, column_widths=widths, wrap=True)
332
 
333
+ def get_gr_hide_labels(self):
334
  return gr.update(choices=self.labels.get()[0], value=[], visible=True)
335
 
336
  def get_gr_filter_item(self, filter_item: str=""):
 
350
  else: d[v] = 1
351
  return gr.update(choices=[""] + [t[0] for t in sorted(d.items(), key=lambda x : x[1])][:100], value="", visible=True)
352
 
353
+ def search(repo_types: list, sort: str, sort_method: str, filter_str: str, search_str: str, author: str, tags: str, infer: str,
354
+ gated: str, appr: list[str], size_categories: list, limit: int, hardware: list, stage: list, fetch_detail: list, show_labels: list, r: HFSearchResult):
 
 
 
 
 
355
  try:
356
+ r.search(repo_types, sort, sort_method, filter_str, search_str, author, tags, infer, gated, appr, size_categories,
357
+ limit, hardware, stage, fetch_detail, show_labels)
358
+ return r.get_gr_df(), r.get_gr_hide_labels(), r
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
359
  except Exception as e:
360
  raise gr.Error(e)
361
 
362
+ def update_df(hide_labels: list, filter_item1: str, filter1: str, r: HFSearchResult):
363
+ r.set_hide(hide_labels)
364
  r.set_filter(filter_item1, filter1)
365
  return r.get_gr_df(), r
366
 
subtags.json ADDED
The diff for this file is too large to render. See raw diff
 
tags.json ADDED
The diff for this file is too large to render. See raw diff