OpenSourceRonin commited on
Commit
fef40a8
·
1 Parent(s): e3ec733
Files changed (1) hide show
  1. app.py +2 -1
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
@@ -10,7 +11,7 @@ from vptq.app_utils import get_chat_loop_generator
10
 
11
  chat_completion = get_chat_loop_generator("VPTQ-community/Meta-Llama-3.1-70B-Instruct-v8-k32768-0-woft")
12
 
13
-
14
  def respond(
15
  message,
16
  history: list[tuple[str, str]],
 
1
+ import spaces
2
  import gradio as gr
3
  from huggingface_hub import InferenceClient
4
 
 
11
 
12
  chat_completion = get_chat_loop_generator("VPTQ-community/Meta-Llama-3.1-70B-Instruct-v8-k32768-0-woft")
13
 
14
+ @spaces.GPU
15
  def respond(
16
  message,
17
  history: list[tuple[str, str]],