llama-3.2-3B-Instruct

Running on Zero

ehristoforu commited on 4 days ago

Commit

4b22725

verified ·

1 Parent(s): 9dffb16

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -52,7 +52,7 @@ api.upload_folder(
 )
 '''
-@spaces.GPU(duration=60)
 def generate(
     message: str,
     chat_history: list[dict],
@@ -68,7 +68,7 @@ def generate(
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
         gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
-    input_ids = input_ids.to(model.device)
     streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(

 )
 '''
+@spaces.GPU()
 def generate(
     message: str,
     chat_history: list[dict],
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
         gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
+    input_ids = input_ids.to(merged_model.device)
     streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(