File size: 3,337 Bytes
3531f81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
from pathlib import Path

import gradio as gr

from app_1M_image import get_demo as get_demo_1M_image
from app_image import get_demo as get_demo_image
from app_json import get_demo as get_demo_json
from huggingface_hub import logging


logging.set_verbosity_debug()


def _get_demo_code(path: str) -> str:
    code = Path(path).read_text()
    code = code.replace("def get_demo():", "with gr.Blocks() as demo:")
    code += "\n\ndemo.launch()"
    return code


DEMO_EXPLANATION = """
<h1 style='text-align: center; margin-bottom: 1rem'> How to persist data from a Space to a Dataset? </h1>

This demo shows how to leverage both `gradio` and `huggingface_hub` to save data from a Space to a Dataset on the Hub.
When doing so, a few things must be taken care of: file formats, concurrent writes, name collision, number of commits,
number of files,... The tabs below shows different ways of implementing a "save to dataset" feature. Depending on the
complexity and usage of your app, you might want to use one or the other.

This Space demo comes as a pair with this guide. If you need more technical details, please refer to it.
"""

JSON_DEMO_EXPLANATION = """
## Use case

- Save inputs and outputs
- Build an annotation platform

## Data

Json-able only: text and numeric but no binaries.

## Robustness

Works with concurrent users and replicas.

## Limitations

if you expect millions of lines, you will need to split the local JSON file into multiple files to avoid getting your file tracked as LFS (5MB) on the Hub.

## Demo
"""

IMAGE_DEMO_EXPLANATION = """
## Use case

Save images with metadata (caption, parameters, datetime,...).

## Robustness

Works with concurrent users and replicas.

## Limitations

  - only 10k images/folder supported on the Hub. If you expect more usage, you must save data in subfolders.
  - only 1M images/repo supported on the Hub. If you expect more usage, you can zip your data before upload. See the _1M images Dataset_ demo.

## Demo
"""

IMAGE_1M_DEMO_EXPLANATION = """
## Use case:

Same as _Image Dataset_ example, but with very high usage expected.

## Robustness

Works with concurrent users and replicas.

## Limitations

None.

## Demo
"""

with gr.Blocks() as demo:
    gr.Markdown(DEMO_EXPLANATION)

    with gr.Tab("JSON Dataset"):
        gr.Markdown(JSON_DEMO_EXPLANATION)
        get_demo_json()
        gr.Markdown("## Result\n\nhttps://huggingface.co/datasets/Wauplin/example-commit-scheduler-json\n\n## Code")
        with gr.Accordion("Source code", open=True):
            gr.Code(_get_demo_code("app_json.py"), language="python")

    with gr.Tab("Image Dataset"):
        gr.Markdown(IMAGE_DEMO_EXPLANATION)
        get_demo_image()
        gr.Markdown("## Result\n\nhttps://huggingface.co/datasets/Wauplin/example-commit-scheduler-image\n\n## Code")
        with gr.Accordion("Source code", open=True):
            gr.Code(_get_demo_code("app_image.py"), language="python")

    with gr.Tab("1M images Dataset"):
        gr.Markdown(IMAGE_1M_DEMO_EXPLANATION)
        get_demo_1M_image()
        gr.Markdown(
            "## Result\n\nhttps://huggingface.co/datasets/Wauplin/example-commit-scheduler-image-zip\n\n## Code"
        )
        with gr.Accordion("Source code", open=True):
            gr.Code(_get_demo_code("app_1M_image.py"), language="python")
demo.launch()