Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -5,7 +5,7 @@ from awq import AutoAWQForCausalLM
|
|
5 |
|
6 |
model_path = "bragour/Camel-7b-chat-awq"
|
7 |
|
8 |
-
model
|
9 |
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=False)
|
10 |
|
11 |
|
@@ -17,8 +17,8 @@ def respond(
|
|
17 |
):
|
18 |
formatted_prompt = f"<s>[INST]{message}[/INST]"
|
19 |
|
20 |
-
tokens = tokenizer(formatted_prompt,return_tensors='pt').input_ids.cuda()
|
21 |
-
|
22 |
# Generate the response from the API
|
23 |
result = model.generate(
|
24 |
tokens,
|
@@ -27,7 +27,7 @@ def respond(
|
|
27 |
temperature=temperature,
|
28 |
top_p=top_p,
|
29 |
)
|
30 |
-
|
31 |
response = tokenizer.decode(result[0], skip_special_tokens=True)
|
32 |
|
33 |
return response
|
@@ -38,7 +38,7 @@ demo = gr.Interface(
|
|
38 |
inputs="text",
|
39 |
outputs=["text"]
|
40 |
)
|
41 |
-
|
42 |
|
43 |
if __name__ == "__main__":
|
44 |
demo.launch()
|
|
|
5 |
|
6 |
model_path = "bragour/Camel-7b-chat-awq"
|
7 |
|
8 |
+
model = AutoAWQForCausalLM.from_quantized(model_path, fuse_layers=True, trust_remote_code=False, safetensors=True)
|
9 |
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=False)
|
10 |
|
11 |
|
|
|
17 |
):
|
18 |
formatted_prompt = f"<s>[INST]{message}[/INST]"
|
19 |
|
20 |
+
tokens = tokenizer(formatted_prompt, return_tensors='pt').input_ids.cuda()
|
21 |
+
|
22 |
# Generate the response from the API
|
23 |
result = model.generate(
|
24 |
tokens,
|
|
|
27 |
temperature=temperature,
|
28 |
top_p=top_p,
|
29 |
)
|
30 |
+
|
31 |
response = tokenizer.decode(result[0], skip_special_tokens=True)
|
32 |
|
33 |
return response
|
|
|
38 |
inputs="text",
|
39 |
outputs=["text"]
|
40 |
)
|
41 |
+
demo.launch(inline=False)
|
42 |
|
43 |
if __name__ == "__main__":
|
44 |
demo.launch()
|