Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -5,9 +5,8 @@ from awq import AutoAWQForCausalLM
|
|
5 |
|
6 |
model_path = "bragour/Camel-7b-chat-awq"
|
7 |
|
8 |
-
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
9 |
|
10 |
-
model = AutoAWQForCausalLM.from_quantized(model_path, fuse_layers=True, trust_remote_code=False, safetensors=True)
|
11 |
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=False)
|
12 |
|
13 |
|
@@ -16,7 +15,7 @@ def respond(
|
|
16 |
):
|
17 |
formatted_prompt = f"<s>[INST]{message}[/INST]"
|
18 |
|
19 |
-
tokens = tokenizer(formatted_prompt, return_tensors='pt').input_ids.
|
20 |
|
21 |
# Generate the response from the API
|
22 |
result = model.generate(
|
|
|
5 |
|
6 |
model_path = "bragour/Camel-7b-chat-awq"
|
7 |
|
|
|
8 |
|
9 |
+
model = AutoAWQForCausalLM.from_quantized(model_path, fuse_layers=True, trust_remote_code=False, safetensors=True)
|
10 |
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=False)
|
11 |
|
12 |
|
|
|
15 |
):
|
16 |
formatted_prompt = f"<s>[INST]{message}[/INST]"
|
17 |
|
18 |
+
tokens = tokenizer(formatted_prompt, return_tensors='pt').input_ids.cuda()
|
19 |
|
20 |
# Generate the response from the API
|
21 |
result = model.generate(
|