File size: 1,647 Bytes
3f1efc2 1ad3151 3f1efc2 1ad3151 3f1efc2 1ad3151 3f1efc2 1ad3151 3f1efc2 1ad3151 3f1efc2 1ad3151 3f1efc2 1ad3151 3f1efc2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Gradio-Lite: Serverless Gradio Running Entirely in Your Browser</title>
<meta name="description" content="Gradio-Lite: Serverless Gradio Running Entirely in Your Browser">
<script type="module" crossorigin src="https://cdn.jsdelivr.net/npm/@gradio/lite/dist/lite.js"></script>
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@gradio/lite/dist/lite.css" />
<style>
html, body {
margin: 0;
padding: 0;
height: 100%;
}
</style>
</head>
<body>
<gradio-lite>
<gradio-file name="app.py" entrypoint>
import gradio as gr
from transformers_js_py import pipeline
generator = await pipeline(
"text-generation",
"onnx-community/Qwen2.5-0.5B-Instruct",
{ "dtype": "q4", "device": "webgpu" }
)
async def chat_response(message, history):
messages = [
{ "role": "system", "content": "You are a great assistant." },
{ "role": "user", "content": message }
]
output = await generator(messages, {
"max_new_tokens": 256,
"do_sample": True,
"temperature": 0.3,
})
response = output[0]["generated_text"][-1]["content"]
return response
demo = gr.ChatInterface(chat_response, type="messages", autofocus=False)
demo.launch()
</gradio-file>
<gradio-requirements>
transformers-js-py
</gradio-requirements>
</gradio-lite>
</body>
</html> |