Spaces:

OpenSourceRonin
/

VPTQ-demo

Running on Zero

OpenSourceRonin commited on Oct 4, 2024

Commit

fef40a8

1 Parent(s): e3ec733

gpu

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
@@ -10,7 +11,7 @@ from vptq.app_utils import get_chat_loop_generator
 chat_completion = get_chat_loop_generator("VPTQ-community/Meta-Llama-3.1-70B-Instruct-v8-k32768-0-woft")
 def respond(
     message,
     history: list[tuple[str, str]],

+import spaces
 import gradio as gr
 from huggingface_hub import InferenceClient
 chat_completion = get_chat_loop_generator("VPTQ-community/Meta-Llama-3.1-70B-Instruct-v8-k32768-0-woft")
+@spaces.GPU
 def respond(
     message,
     history: list[tuple[str, str]],