File size: 1,647 Bytes
3f1efc2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1ad3151
3f1efc2
1ad3151
 
 
 
3f1efc2
 
1ad3151
 
 
 
 
3f1efc2
1ad3151
 
 
 
 
 
 
3f1efc2
1ad3151
3f1efc2
1ad3151
 
3f1efc2
 
1ad3151
3f1efc2
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
<!DOCTYPE html>
<html>
    <head>
        <meta charset="utf-8">
        <meta name="viewport" content="width=device-width, initial-scale=1">
        <title>Gradio-Lite: Serverless Gradio Running Entirely in Your Browser</title>
        <meta name="description" content="Gradio-Lite: Serverless Gradio Running Entirely in Your Browser">

        <script type="module" crossorigin src="https://cdn.jsdelivr.net/npm/@gradio/lite/dist/lite.js"></script>
        <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@gradio/lite/dist/lite.css" />

        <style>
            html, body {
                margin: 0;
                padding: 0;
                height: 100%;
            }
        </style>
    </head>
    <body>
        <gradio-lite>
            <gradio-file name="app.py" entrypoint>
import gradio as gr
from transformers_js_py import pipeline

generator = await pipeline(
    "text-generation",
    "onnx-community/Qwen2.5-0.5B-Instruct",
    { "dtype": "q4", "device": "webgpu" }
)

async def chat_response(message, history):
    messages = [
        { "role": "system", "content": "You are a great assistant." },
        { "role": "user", "content": message }
    ]

    output = await generator(messages, {
        "max_new_tokens": 256,
        "do_sample": True,
        "temperature": 0.3,
    })
    response = output[0]["generated_text"][-1]["content"]
    return response

demo = gr.ChatInterface(chat_response, type="messages", autofocus=False)

demo.launch()
            </gradio-file>

            <gradio-requirements>
transformers-js-py
            </gradio-requirements>
        </gradio-lite>
    </body>
</html>