Spaces:
Running
on
Zero
Running
on
Zero
ehristoforu
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -52,7 +52,7 @@ api.upload_folder(
|
|
52 |
)
|
53 |
'''
|
54 |
|
55 |
-
@spaces.GPU(
|
56 |
def generate(
|
57 |
message: str,
|
58 |
chat_history: list[dict],
|
@@ -68,7 +68,7 @@ def generate(
|
|
68 |
if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
|
69 |
input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
|
70 |
gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
|
71 |
-
input_ids = input_ids.to(
|
72 |
|
73 |
streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
|
74 |
generate_kwargs = dict(
|
|
|
52 |
)
|
53 |
'''
|
54 |
|
55 |
+
@spaces.GPU()
|
56 |
def generate(
|
57 |
message: str,
|
58 |
chat_history: list[dict],
|
|
|
68 |
if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
|
69 |
input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
|
70 |
gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
|
71 |
+
input_ids = input_ids.to(merged_model.device)
|
72 |
|
73 |
streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
|
74 |
generate_kwargs = dict(
|