Spaces:

John6666
/

testwarm

Running

App Files Files Community

John6666 commited on Jan 4

Commit

a8f6f3c

verified ·

1 Parent(s): 2303139

Upload 8 files

Browse files

Files changed (6) hide show

README.md +2 -2
app.py +81 -18
hfconstants.py +7 -0
hfsearch.py +263 -68
subtags.json +0 -0
tags.json +0 -0

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
-title: test warm models
-emoji: 🙄
 colorFrom: indigo
 colorTo: purple
 sdk: gradio

 ---
+title: Search HF🤗 Inference API warm models
+emoji: 🤗🔍
 colorFrom: indigo
 colorTo: purple
 sdk: gradio

app.py CHANGED Viewed

@@ -1,36 +1,99 @@
 import spaces
 import gradio as gr
-from hfsearch import HFSearchResult, search, update_filter, update_df
-with gr.Blocks(fill_width=True) as demo:
     with gr.Column():
         search_result = gr.State(value=HFSearchResult())
-        with gr.Group():
-            with gr.Row(equal_height=True):
-                infer_status = gr.Radio(label="Inference status", choices=["warm", "cold", "frozen", "all"], value="warm")
-                gated_status = gr.Radio(label="Gated status", choices=["gated", "non-gated", "all"], value="non-gated")
-                appr_status = gr.CheckboxGroup(label="Approval method", choices=["auto", "manual"], value=["auto", "manual"])
-            with gr.Accordion("Advanced", open=False):
                 with gr.Row(equal_height=True):
-                    filter = gr.Textbox(label="Query", value="")
-                    author = gr.Textbox(label="Author", value="")
-                    sort = gr.Radio(label="Sort", choices=["last_modified", "likes", "downloads"], value="likes")
-                    sort_method = gr.Radio(label="Sort method", choices=["ascending order", "descending order"], value="ascending order")
-                    limit = gr.Number(label="Limit", info="If 0, fetches all models", value=1000, step=1, minimum=0, maximum=10000000)
-        run_button = gr.Button("Search", variant="primary")
         with gr.Group():
             with gr.Accordion("Filter", open=False):
-                hide_item = gr.CheckboxGroup(label="Hide items", choices=[], value=[], visible=False)
                 with gr.Row(equal_height=True):
                     filter_item1 = gr.Dropdown(label="Filter item", choices=[""], value="", visible=False)
                     filter1 = gr.Dropdown(label="Filter", choices=[""], value="", allow_custom_value=True, visible=False)
                     filter_btn = gr.Button("Apply filter", variant="secondary", visible=False)
-            result_df = gr.DataFrame(label="Results", type="array", value=[[]], interactive=False)
-    run_button.click(search, [sort, sort_method, filter, author, infer_status, gated_status, appr_status, limit, search_result], [result_df, hide_item, search_result])\
     .success(update_filter, [filter_item1, search_result], [filter_item1, filter1, filter_btn, search_result], queue=False)
-    gr.on(triggers=[hide_item.change, filter_btn.click], fn=update_df, inputs=[hide_item, filter_item1, filter1, search_result],
           outputs=[result_df, search_result], trigger_mode="once", queue=False, show_api=False)
     filter_item1.change(update_filter, [filter_item1, search_result], [filter_item1, filter1, filter_btn, search_result], queue=False, show_api=False)
 demo.queue().launch()

 import spaces
 import gradio as gr
+from hfsearch import (HFSearchResult, search, update_filter, update_df, get_labels, get_valid_labels,
+                      get_tags, get_subtag_categories, update_subtag_items, update_tags, update_subtags,
+                      DS_SIZE_CATEGORIES, SPACE_HARDWARES, SPACE_STAGES)
+CSS = """
+.title { align-items: center; text-align: center; }
+.info { align-items: center; text-align: center; }
+"""
+with gr.Blocks(theme="NoCrypt/miku", fill_width=True, css=CSS) as demo:
+    gr.Markdown("# Search Hugging Face🤗", elem_classes="title")
     with gr.Column():
         search_result = gr.State(value=HFSearchResult())
+        with gr.Tab("Normal Search"):
+            with gr.Group():
                 with gr.Row(equal_height=True):
+                    repo_types = gr.CheckboxGroup(label="Repo type", choices=["model", "dataset", "space"], value=["model", "dataset", "space"])
+                with gr.Accordion("Advanced", open=False):
+                    with gr.Row(equal_height=True):
+                        filter_str = gr.Textbox(label="Filter", info="String(s) to filter repos", value="")
+                        search_str = gr.Textbox(label="Search", info="A string that will be contained in the returned repo ids", placeholder="bert", value="", lines=1)
+                        author = gr.Textbox(label="Author", info="The author (user or organization)", value="", lines=1)
+                    with gr.Column():
+                        tags = gr.Textbox(label="Tags", info="Tag(s) to filter repos", value="")
+                        with gr.Accordion("Tag input assistance", open=False):
+                            with gr.Row(equal_height=True):
+                                tag_item = gr.Dropdown(label="Item", choices=get_tags(), value=get_tags()[0], allow_custom_value=True, scale=4)
+                                tag_btn = gr.Button("Add", scale=1)
+                            with gr.Row(equal_height=True):
+                                subtag_cat = gr.Dropdown(label="Category", choices=get_subtag_categories(), value=get_subtag_categories()[0], scale=2)
+                                subtag_item = gr.Dropdown(label="Item", choices=[""], value="", allow_custom_value=True, scale=2)
+                                subtug_btn = gr.Button("Add", scale=1)
+                    with gr.Column():
+                        gated_status = gr.Radio(label="Gated status", choices=["gated", "non-gated", "all"], value="all")
+                        appr_status = gr.CheckboxGroup(label="Approval method", choices=["auto", "manual"], value=["auto", "manual"])
+                    with gr.Tab("for Models"):
+                        with gr.Column():
+                            infer_status = gr.Radio(label="Inference status", choices=["warm", "cold", "frozen", "all"], value="all")
+                            gr.Markdown("[About the Inference API status (Warm, Cold, Frozen)](https://huggingface.co/docs/api-inference/supported-models)", elem_classes="info")
+                    #    with gr.Row(equal_height=True):
+                    #        model_task = gr.Textbox(label="Task", info="String(s) of tasks models were designed for", placeholder="fill-mask", value="")
+                    #        trained_dataset = gr.Textbox(label="Trained dataset", info="Trained dataset for a model", value="")
+                    with gr.Tab("for Datasets"):
+                        size_categories = gr.CheckboxGroup(label="Size categories", info="The size of the dataset", choices=DS_SIZE_CATEGORIES, value=[])
+                    #    task_categories = gr.Textbox(label="Task categories", info="Identify datasets by the designed task", value="")
+                    #    task_ids = gr.Textbox(label="Task IDs", info="Identify datasets by the specific task", value="")
+                    #    language_creators = gr.Textbox(label="Language creators", info="Identify datasets with how the data was curated", value="")
+                    #    language = gr.Textbox(label="Language", info="String(s) representing two-character language to filter datasets by", value="")
+                    #    multilinguality = gr.Textbox(label="Multilinguality", info="String(s) representing a filter for datasets that contain multiple languages", value="")
+                    with gr.Tab("for Spaces"):
+                        with gr.Row(equal_height=True):
+                            hardware = gr.CheckboxGroup(label="Specify hardware", choices=SPACE_HARDWARES, value=[])
+                            stage = gr.CheckboxGroup(label="Specify stage", choices=SPACE_STAGES, value=[])
+                    with gr.Row(equal_height=True):
+                        sort = gr.Radio(label="Sort", choices=["last_modified", "likes", "downloads", "trending_score"], value="likes")
+                        sort_method = gr.Radio(label="Sort method", choices=["ascending order", "descending order"], value="ascending order")
+                        limit = gr.Number(label="Limit", info="If 0, fetches all models", value=1000, step=1, minimum=0, maximum=10000000)
+                        fetch_detail = gr.CheckboxGroup(label="Fetch detail", choices=["Space Runtime"], value=["Space Runtime"])
+                    with gr.Row(equal_height=True):
+                        show_labels = gr.CheckboxGroup(label="Show items", choices=get_labels(), value=get_valid_labels())
+                run_button = gr.Button("Search", variant="primary")
+        with gr.Tab("Find Serverless Inference API enabled models"):
+            with gr.Group():
+                with gr.Row(equal_height=True):
+                    infer_repo_types = gr.CheckboxGroup(label="Repo type", choices=["model", "dataset", "space"], value=["model"], visible=False)
+                    with gr.Column():
+                        infer_infer_status = gr.Radio(label="Inference status", choices=["warm", "cold", "frozen", "all"], value="warm")
+                        gr.Markdown("[About the Inference API status (Warm, Cold, Frozen)](https://huggingface.co/docs/api-inference/supported-models)", elem_classes="info")
+                    with gr.Column():
+                        infer_gated_status = gr.Radio(label="Gated status", choices=["gated", "non-gated", "all"], value="all")
+                        infer_appr_status = gr.CheckboxGroup(label="Approval method", choices=["auto", "manual"], value=["auto", "manual"])
+                infer_run_button = gr.Button("Search", variant="primary")
         with gr.Group():
             with gr.Accordion("Filter", open=False):
+                hide_labels = gr.CheckboxGroup(label="Hide items", choices=[], value=[], visible=False)
                 with gr.Row(equal_height=True):
                     filter_item1 = gr.Dropdown(label="Filter item", choices=[""], value="", visible=False)
                     filter1 = gr.Dropdown(label="Filter", choices=[""], value="", allow_custom_value=True, visible=False)
                     filter_btn = gr.Button("Apply filter", variant="secondary", visible=False)
+            result_df = gr.DataFrame(label="Results", type="pandas", value=None, interactive=False)
+    run_button.click(search, [repo_types, sort, sort_method, filter_str, search_str, author, tags, infer_status, gated_status, appr_status,
+                              size_categories, limit, hardware, stage, fetch_detail, show_labels, search_result],
+                     [result_df, hide_labels, search_result])\
+    .success(update_filter, [filter_item1, search_result], [filter_item1, filter1, filter_btn, search_result], queue=False)
+    infer_run_button.click(search, [infer_repo_types, sort, sort_method, filter_str, search_str, author, tags, infer_infer_status, infer_gated_status, infer_appr_status,
+                                    size_categories, limit, hardware, stage, fetch_detail, show_labels, search_result],
+                           [result_df, hide_labels, search_result])\
     .success(update_filter, [filter_item1, search_result], [filter_item1, filter1, filter_btn, search_result], queue=False)
+    gr.on(triggers=[hide_labels.change, filter_btn.click], fn=update_df, inputs=[hide_labels, filter_item1, filter1, search_result],
           outputs=[result_df, search_result], trigger_mode="once", queue=False, show_api=False)
     filter_item1.change(update_filter, [filter_item1, search_result], [filter_item1, filter1, filter_btn, search_result], queue=False, show_api=False)
+    subtag_cat.change(update_subtag_items, [subtag_cat], [subtag_item], queue=False, show_api=False)
+    subtug_btn.click(update_subtags, [tags, subtag_cat, subtag_item], [tags], queue=False, show_api=False)
+    tag_btn.click(update_tags, [tags, tag_item], [tags], queue=False, show_api=False)
 demo.queue().launch()

hfconstants.py ADDED Viewed

	@@ -0,0 +1,7 @@

+DS_SIZE_CATEGORIES = ["n<1K", "1K<n<10K", "10K<n<100K", "100K<n<1M", "1M<n<10M", "10M<n<100M",
+    "100M<n<1B", "1B<n<10B", "10B<n<100B", "100B<n<1T", "n>1T"]
+SPACE_HARDWARES = ["cpu-basic", "zero-a10g", "cpu-upgrade", "t4-small", "l4x1", "a10g-large", "l40sx1", "a10g-small", "t4-medium", "cpu-xl", "a100-large"]
+SPACE_STAGES = ["RUNNING", "SLEEPING", "RUNTIME_ERROR", "PAUSED", "BUILD_ERROR", "CONFIG_ERROR", "BUILDING", "APP_STARTING", "RUNNING_APP_STARTING"]

hfsearch.py CHANGED Viewed

@@ -1,7 +1,82 @@
 import spaces
 import gradio as gr
-from huggingface_hub import HfApi
 import gc
 class Labels():
     VALID_DTYPE = ["str", "number", "bool", "date", "markdown"]
@@ -9,65 +84,210 @@ class Labels():
     def __init__(self):
         self.types = {}
         self.orders = {}
-    def set(self, label: str, type: str="str", order: int=255):
-        if type not in self.VALID_DTYPE: raise Exception(f"Invalid data type: {type}")
-        self.types[label] = type
-        self.orders[label] = order
     def get(self):
         labels = list(self.types.keys())
         labels.sort(key=lambda x: self.orders[x])
         label_types = [self.types[s] for s in labels]
         return labels, label_types
     def get_null_value(self, type: str):
         if type == "bool": return False
         elif type == "number" or type == "date": return 0
         else: return "None"
 class HFSearchResult():
     def __init__(self):
         self.labels = Labels()
         self.current_item = {}
-        self.current_show_item = {}
         self.item_list = []
-        self.show_item_list = []
         self.item_hide_flags = []
-        self.hide_item = []
         self.filter_items = None
         self.filters = None
         gc.collect()
     def reset(self):
         self.__init__()
-    def set(self, data, label: str, type: str="str", order: int=255, show_data=None):
-        self.labels.set(label, type, order)
         self.current_item[label] = data
-        if show_data is not None: self.current_show_item[label] = show_data
-    def next(self):
         self.item_list.append(self.current_item.copy())
         self.current_item = {}
-        self.show_item_list.append(self.current_show_item.copy())
-        self.current_show_item = {}
-    def get(self):
-        labels, label_types = self.labels.get()
-        df = [[item.get(l, self.labels.get_null_value(t)) for l, t in zip(labels, label_types)] for item in self.item_list]
-        return df, labels, label_types
-    def get_show(self):
         labels, label_types = self.labels.get()
         self._do_filter()
-        df = [[show_item.get(l, self.labels.get_null_value(t)) if l in show_item.keys() else item.get(l, self.labels.get_null_value(t)) for l, t in zip(labels, label_types) if l not in set(self.hide_item)] for item, show_item, is_hide in zip(self.item_list, self.show_item_list, self.item_hide_flags) if not is_hide]
-        show_label_types = [t for l, t in zip(labels, label_types) if l not in self.hide_item]
-        show_labels = [l for l in labels if l not in self.hide_item]
         return df, show_labels, show_label_types
-    def set_hide(self, hide_item: list):
-        self.hide_item = hide_item
     def set_filter(self, filter_item1: str, filter1: str):
         if not filter_item1 and not filter1:
@@ -100,11 +320,17 @@ class HFSearchResult():
             flags.append(flag)
         self.item_hide_flags = flags
     def get_gr_df(self):
-        df, labels, label_types = self.get_show()
-        return gr.update(type="array", value=df, headers=labels, datatype=label_types)
-    def get_gr_hide_item(self):
         return gr.update(choices=self.labels.get()[0], value=[], visible=True)
     def get_gr_filter_item(self, filter_item: str=""):
@@ -124,48 +350,17 @@ class HFSearchResult():
             else: d[v] = 1
         return gr.update(choices=[""] + [t[0] for t in sorted(d.items(), key=lambda x : x[1])][:100], value="", visible=True)
-def md_lb(s: str, count: int):
-    return "<br>".join([s[i:i+count] for i in range(0, len(s), count)])
-# https://huggingface.co/docs/huggingface_hub/package_reference/hf_api
-# https://huggingface.co/docs/huggingface_hub/package_reference/hf_api#huggingface_hub.ModelInfo
-@spaces.GPU
-def search(sort: str, sort_method: str, filter: str, author: str, infer: str, gated: str, appr: list[str], limit: int, r: HFSearchResult):
     try:
-        api = HfApi()
-        kwargs = {}
-        if filter: kwargs["filter"] = filter
-        if author: kwargs["author"] = author
-        if gated == "gated": kwargs["gated"] = True
-        elif gated == "non-gated": kwargs["gated"] = False
-        if infer != "all": kwargs["inference"] = infer
-        if sort_method == "descending order": kwargs["direction"] = -1
-        if limit > 0: kwargs["limit"] = limit
-        models = api.list_models(sort=sort, cardData=True, full=True, **kwargs)
-        r.reset()
-        i = 1
-        for model in models:
-            if model.gated is not None and model.gated and model.gated not in appr: continue
-            r.set(i, "No.", "number", 0)
-            r.set(model.id, "Model", "markdown", 2, f"[{md_lb(model.id, 48)}](https://hf.co/{model.id})")
-            if model.inference is not None: r.set(model.inference, "Status", "markdown", 4, md_lb(model.inference, 8))
-            #if infer != "all": r.set(infer, "Status", "markdown", 4)
-            if model.gated is not None: r.set(model.gated if model.gated else "off", "Gated", "str", 6)
-            #if gated != "all": r.set("on" if gated == "gated" else "off", "Gated", "str", 6)
-            if model.library_name is not None: r.set(model.library_name, "Library", "markdown", 10, md_lb(model.library_name, 12))
-            if model.pipeline_tag is not None: r.set(model.pipeline_tag, "Pipeline", "markdown", 11, md_lb(model.pipeline_tag, 15))
-            if model.last_modified is not None: r.set(model.last_modified, "LastMod.", "date", 12)
-            if model.likes is not None: r.set(model.likes, "Likes", "number", 13)
-            if model.downloads is not None: r.set(model.downloads, "DLs", "number", 14)
-            if model.downloads_all_time is not None: r.set(model.downloads_all_time, "AllDLs", "number", 15)
-            r.next()
-            i += 1
-        return r.get_gr_df(), r.get_gr_hide_item(), r
     except Exception as e:
         raise gr.Error(e)
-def update_df(hide_item: list, filter_item1: str, filter1: str, r: HFSearchResult):
-    r.set_hide(hide_item)
     r.set_filter(filter_item1, filter1)
     return r.get_gr_df(), r

 import spaces
 import gradio as gr
+from huggingface_hub import HfApi, ModelInfo, DatasetInfo, SpaceInfo
+from typing import Union
 import gc
+import pandas as pd
+import datetime
+import json
+import re
+from hfconstants import DS_SIZE_CATEGORIES, SPACE_HARDWARES, SPACE_STAGES
+@spaces.GPU
+def dummy_gpu():
+    pass
+RESULT_ITEMS = {
+    "Type": [1, "str", True],
+    "ID": [2, "markdown", True, "40%"],
+    "Status": [4, "markdown", True],
+    "Gated": [6, "str", True],
+    "Likes": [10, "number", True],
+    "DLs": [12, "number", True],
+    "AllDLs": [13, "number", False],
+    "Trending": [16, "number", True],
+    "LastMod.": [17, "str", True],
+    "Library": [20, "markdown", False],
+    "Pipeline": [21, "markdown", True],
+    "Hardware": [25, "str", False],
+    "Stage": [26, "str", False],
+    "NFAA": [40, "str", False],
+}
+try:
+    with open("tags.json", encoding="utf-8") as f:
+        TAGS = json.load(f)
+    with open("subtags.json", encoding="utf-8") as f:
+        SUBTAGS = json.load(f)
+except Exception as e:
+    TAGS = []
+    SUBTAGS = {}
+    print(e)
+def get_tags():
+    return TAGS[0:1000]
+def get_subtag_categories():
+    return list(SUBTAGS.keys())
+def update_subtag_items(category: str):
+    choices=[""] + list(SUBTAGS.get(category, []))
+    return gr.update(choices=choices, value=choices[0])
+def update_subtags(tags: str, category: str, item: str):
+    addtag = f"{category}:{item}" if item else ""
+    newtags = f"{tags}\n{addtag}" if tags else addtag
+    return newtags
+def update_tags(tags: str, item: str):
+    newtags = f"{tags}\n{item}" if tags else item
+    return newtags
+def str_to_list(s: str):
+    try:
+        m = re.split("\n", s)
+        return [s.strip() for s in list(m)]
+    except Exception:
+        return []
+def is_valid_arg(s: str):
+    return len(str_to_list(s)) > 0
+def get_labels():
+    return list(RESULT_ITEMS.keys())
+def get_valid_labels():
+    return [k for k in list(RESULT_ITEMS.keys()) if RESULT_ITEMS[k][2]]
+def date_to_str(dt: datetime.datetime):
+    return dt.strftime('%Y-%m-%d %H:%M')
 class Labels():
     VALID_DTYPE = ["str", "number", "bool", "date", "markdown"]
     def __init__(self):
         self.types = {}
         self.orders = {}
+        self.widths = {}
+    def set(self, label: str):
+        if not label in RESULT_ITEMS.keys(): raise Exception(f"Invalid item: {label}")
+        item = RESULT_ITEMS.get(label)
+        if item[1] not in self.VALID_DTYPE: raise Exception(f"Invalid data type: {type}")
+        self.types[label] = item[1]
+        self.orders[label] = item[0]
+        if len(item) > 3: self.widths[label] = item[3]
+        else: self.widths[label] = "10%"
     def get(self):
         labels = list(self.types.keys())
         labels.sort(key=lambda x: self.orders[x])
         label_types = [self.types[s] for s in labels]
         return labels, label_types
+    def get_widths(self):
+        labels = list(self.types.keys())
+        label_widths = [self.widths[s] for s in labels]
+        return label_widths
     def get_null_value(self, type: str):
         if type == "bool": return False
         elif type == "number" or type == "date": return 0
         else: return "None"
+# https://huggingface.co/docs/huggingface_hub/package_reference/hf_api
+# https://huggingface.co/docs/huggingface_hub/package_reference/hf_api#huggingface_hub.ModelInfo
 class HFSearchResult():
     def __init__(self):
         self.labels = Labels()
         self.current_item = {}
+        self.current_item_info = None
         self.item_list = []
+        self.item_info_list = []
         self.item_hide_flags = []
+        self.hide_labels = []
+        self.show_labels = []
         self.filter_items = None
         self.filters = None
         gc.collect()
     def reset(self):
         self.__init__()
+    def _set(self, data, label: str):
+        self.labels.set(label)
         self.current_item[label] = data
+    def _next(self):
         self.item_list.append(self.current_item.copy())
         self.current_item = {}
+        self.item_info_list.append(self.current_item_info)
+        self.current_item_info = None
+        self.item_hide_flags.append(False)
+    def add_item(self, i: Union[ModelInfo, DatasetInfo, SpaceInfo]):
+        self.current_item_info = i
+        if isinstance(i, ModelInfo): type = "model"
+        elif isinstance(i, DatasetInfo): type = "dataset"
+        elif isinstance(i, SpaceInfo): type = "space"
+        else: return
+        self._set(type, "Type")
+        self._set(i.id, "ID")
+        if i.likes is not None: self._set(i.likes, "Likes")
+        if i.last_modified is not None: self._set(date_to_str(i.last_modified), "LastMod.")
+        if i.trending_score is not None: self._set(int(i.trending_score), "Trending")
+        if i.tags is not None: self._set("True" if "not-for-all-audiences" in i.tags else "False", "NFAA")
+        if type in ["model", "dataset"]:
+            if i.gated is not None: self._set(i.gated if i.gated else "off", "Gated")
+            if i.downloads is not None: self._set(i.downloads, "DLs")
+            if i.downloads_all_time is not None: self._set(i.downloads_all_time, "AllDLs")
+        if type == "model":
+            if i.inference is not None: self._set(i.inference, "Status")
+            if i.library_name is not None: self._set(i.library_name, "Library")
+            if i.pipeline_tag is not None: self._set(i.pipeline_tag, "Pipeline")
+        if type == "space":
+            if i.runtime is not None:
+                self._set(i.runtime.hardware, "Hardware")
+                self._set(i.runtime.stage, "Stage")
+        self._next()
+    def search(self, repo_types: list, sort: str, sort_method: str, filter_str: str, search_str: str, author: str, tags: str, infer: str, gated: str, appr: list[str],
+               size_categories: list, limit: int, hardware: list, stage: list, fetch_detail: list, show_labels: list):
+        try:
+            self.reset()
+            self.show_labels = show_labels.copy()
+            api = HfApi()
+            kwargs = {}
+            mkwargs = {}
+            dkwargs = {}
+            skwargs = {}
+            if filter_str: kwargs["filter"] = str_to_list(filter_str)
+            if search_str: kwargs["search"] = search_str
+            if author: kwargs["author"] = author
+            if tags and is_valid_arg(tags):
+                mkwargs["tags"] = str_to_list(tags)
+                dkwargs["tags"] = str_to_list(tags)
+            if limit > 0: kwargs["limit"] = limit
+            if sort_method == "descending order": kwargs["direction"] = -1
+            if gated == "gated":
+                mkwargs["gated"] = True
+                dkwargs["gated"] = True
+            elif gated == "non-gated":
+                mkwargs["gated"] = False
+                dkwargs["gated"] = False
+            mkwargs["sort"] = sort
+            if len(size_categories) > 0: dkwargs["size_categories"] = size_categories
+            if infer != "all": mkwargs["inference"] = infer
+            if "model" in repo_types:
+                models = api.list_models(full=True, cardData=True, **kwargs, **mkwargs)
+                for model in models:
+                    if model.gated is not None and model.gated and model.gated not in appr: continue
+                    self.add_item(model)
+            if "dataset" in repo_types:
+                datasets = api.list_datasets(full=True, **kwargs, **dkwargs)
+                for dataset in datasets:
+                    if dataset.gated is not None and dataset.gated and dataset.gated not in appr: continue
+                    self.add_item(dataset)
+            if "space" in repo_types:
+                if "Space Runtime" in fetch_detail:
+                    spaces = api.list_spaces(expand=["cardData", "datasets", "disabled", "lastModified", "createdAt",
+                                                     "likes", "models", "private", "runtime", "sdk", "sha", "tags", "trendingScore"], **kwargs, **skwargs)
+                else: spaces = api.list_spaces(full=True, **kwargs, **skwargs)
+                for space in spaces:
+                    if space.gated is not None and space.gated and space.gated not in appr: continue
+                    if space.runtime is not None:
+                         if len(hardware) > 0 and space.runtime.stage == "RUNNING" and space.runtime.hardware not in hardware: continue
+                         if len(stage) > 0 and space.runtime.stage not in stage: continue
+                    self.add_item(space)
+            if sort == "downloads" and ("space" not in repo_types): self.sort("DLs")
+            elif sort == "downloads_all_time" and ("space" not in repo_types): self.sort("AllDLs")
+            elif sort == "likes": self.sort("Likes")
+            elif sort == "trending_score": self.sort("Trending")
+            else: self.sort("LastMod.")
+        except Exception as e:
+            raise Exception(f"Search error: {e}") from e
+    def get(self):
         labels, label_types = self.labels.get()
         self._do_filter()
+        dflist = [[item.get(l, self.labels.get_null_value(t)) for l, t in zip(labels, label_types)] for item, is_hide in zip(self.item_list, self.item_hide_flags) if not is_hide]
+        df = self._to_pandas(dflist, labels)
+        show_label_types = [t for l, t in zip(labels, label_types) if l not in self.hide_labels and l in self.show_labels]
+        show_labels = [l for l in labels if l not in self.hide_labels and l in self.show_labels]
         return df, show_labels, show_label_types
+    def _to_pandas(self, dflist: list, labels: list):
+        # https://pandas.pydata.org/docs/reference/api/pandas.io.formats.style.Styler.apply.html
+        # https://stackoverflow.com/questions/41654949/pandas-style-function-to-highlight-specific-columns
+        # https://stackoverflow.com/questions/69832206/pandas-styling-with-conditional-rules
+        # https://stackoverflow.com/questions/41203959/conditionally-format-python-pandas-cell
+        # https://stackoverflow.com/questions/51187868/how-do-i-remove-and-re-sort-reindex-columns-after-applying-style-in-python-pan
+        # https://stackoverflow.com/questions/36921951/truth-value-of-a-series-is-ambiguous-use-a-empty-a-bool-a-item-a-any-o
+        def rank_df(sdf: pd.DataFrame, df: pd.DataFrame, col: str):
+            ranks = [(0.5, "gold"), (0.75, "orange"), (0.9, "orangered")]
+            for t, color in ranks:
+                sdf.loc[df[col] >= df[col].quantile(q=t), [col]] = f'color: {color}'
+            return sdf
+        def highlight_df(x: pd.DataFrame, df: pd.DataFrame):
+            sdf = pd.DataFrame("", index=x.copy().index, columns=x.copy().columns)
+            columns = df.columns
+            if "Trending" in columns: sdf = rank_df(sdf, df, "Trending")
+            if "Likes" in columns: sdf = rank_df(sdf, df, "Likes")
+            if "AllDLs" in columns: sdf = rank_df(sdf, df, "AllDLs")
+            if "DLs" in columns: sdf = rank_df(sdf, df, "DLs")
+            if "Status" in columns:
+                sdf.loc[df["Status"] == "warm", ["Type"]] = 'color: orange'
+                sdf.loc[df["Status"] == "cold", ["Type"]] = 'color: dodgerblue'
+            if "Gated" in columns:
+                sdf.loc[df["Gated"] == "auto", ["Gated"]] = 'color: dodgerblue'
+                sdf.loc[df["Gated"] == "manual", ["Gated"]] = 'color: crimson'
+            if "Stage" in columns and "Hardware" in columns:
+                sdf.loc[(df["Stage"] == "RUNNING") & (df["Hardware"] != "zero-a10g") & (df["Hardware"] != "cpu-basic") & (df["Hardware"] != "None") & (df["Hardware"]), ["Hardware", "Type"]] = 'color: lime'
+                sdf.loc[(df["Stage"] == "RUNNING") & (df["Hardware"] == "zero-a10g"), ["Hardware", "Type"]] = 'color: green'
+                sdf.loc[(df["Type"] == "space") & (df["Stage"] != "RUNNING")] = 'opacity: 0.5'
+                sdf.loc[(df["Type"] == "space") & (df["Stage"] != "RUNNING"), ["Type"]] = 'color: crimson'
+                sdf.loc[df["Stage"] == "RUNNING", ["Stage"]] = 'color: lime'
+            if "NFAA" in columns: sdf.loc[df["NFAA"] == "True", ["Type"]] = 'background-color: hotpink'
+            show_columns = x.copy().columns
+            style_columns = sdf.columns
+            drop_columns = [c for c in style_columns if c not in show_columns]
+            sdf = sdf.drop(drop_columns, axis=1)
+            return sdf
+        def id_to_md(df: pd.DataFrame):
+            if df["Type"] == "dataset": return f'[{df["ID"]}](https://hf.co/datasets/{df["ID"]})'
+            elif df["Type"] == "space": return f'[{df["ID"]}](https://hf.co/spaces/{df["ID"]})'
+            else: return f'[{df["ID"]}](https://hf.co/{df["ID"]})'
+        def format_md_df(df: pd.DataFrame):
+            df["ID"] = df.apply(id_to_md, axis=1)
+            return df
+        hide_labels = [l for l in labels if l in self.hide_labels or l not in self.show_labels]
+        df = format_md_df(pd.DataFrame(dflist, columns=labels))
+        ref_df = df.copy()
+        df = df.drop(hide_labels, axis=1).style.apply(highlight_df, axis=None, df=ref_df)
+        return df
+    def set_hide(self, hide_labels: list):
+        self.hide_labels = hide_labels.copy()
     def set_filter(self, filter_item1: str, filter1: str):
         if not filter_item1 and not filter1:
             flags.append(flag)
         self.item_hide_flags = flags
+    def sort(self, key="Likes"):
+        if len(self.item_list) == 0: raise Exception("No item found.")
+        if not key in self.labels.get()[0]: key = "Likes"
+        self.item_list, self.item_hide_flags, self.item_info_list = zip(*sorted(zip(self.item_list, self.item_hide_flags, self.item_info_list), key=lambda x: x[0][key], reverse=True))
     def get_gr_df(self):
+        df, labels, label_types = self.get()
+        widths = self.labels.get_widths()
+        return gr.update(type="pandas", value=df, headers=labels, datatype=label_types, column_widths=widths, wrap=True)
+    def get_gr_hide_labels(self):
         return gr.update(choices=self.labels.get()[0], value=[], visible=True)
     def get_gr_filter_item(self, filter_item: str=""):
             else: d[v] = 1
         return gr.update(choices=[""] + [t[0] for t in sorted(d.items(), key=lambda x : x[1])][:100], value="", visible=True)
+def search(repo_types: list, sort: str, sort_method: str, filter_str: str, search_str: str, author: str, tags: str, infer: str,
+           gated: str, appr: list[str], size_categories: list, limit: int, hardware: list, stage: list, fetch_detail: list, show_labels: list, r: HFSearchResult):
     try:
+        r.search(repo_types, sort, sort_method, filter_str, search_str, author, tags, infer, gated, appr, size_categories,
+                 limit, hardware, stage, fetch_detail, show_labels)
+        return r.get_gr_df(), r.get_gr_hide_labels(), r
     except Exception as e:
         raise gr.Error(e)
+def update_df(hide_labels: list, filter_item1: str, filter1: str, r: HFSearchResult):
+    r.set_hide(hide_labels)
     r.set_filter(filter_item1, filter1)
     return r.get_gr_df(), r

subtags.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tags.json ADDED Viewed

The diff for this file is too large to render. See raw diff