zhangtao commited on
Commit
2dc2f8f
·
1 Parent(s): d65d1b8
Files changed (3) hide show
  1. Dockerfile +20 -0
  2. app.py +39 -0
  3. requirements.txt +2 -0
Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.10
5
+
6
+ WORKDIR /code
7
+
8
+ RUN wget https://huggingface.co/zhangtao103239/Qwen-1.8B-GGUF/resolve/main/qwen-1.8b-f16.gguf?download=true -O qwen-1.8b-f16.gguf
9
+
10
+ RUN wget https://huggingface.co/zhangtao103239/Qwen-1.8B-GGUF/resolve/main/qwen-1.8b-q5_k_m.gguf?download=true -O qwen-1.8b-q5_k_m.gguf
11
+
12
+ COPY ./requirements.txt /code/requirements.txt
13
+
14
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
15
+
16
+ COPY . .
17
+
18
+ ENV MPLCONFIGDIR /code/matplotlib/
19
+
20
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from llama_cpp import Llama
3
+ llm_f16 = Llama(model_path="./qwen-1.8b-f16.gguf",
4
+ n_ctx=4096,
5
+ n_threads=2,
6
+ chat_format="chatml")
7
+ llm_q5_k_m = Llama(model_path="./qwen-1.8b-q5_k_m.gguf",
8
+ n_ctx=4096,
9
+ n_threads=2,
10
+ chat_format="chatml")
11
+
12
+ def chat_stream_completion(message, history, system_prompt, q5_check):
13
+ messages_prompts = [{"role": "system", "content": system_prompt}]
14
+ llm = None
15
+ if q5_check:
16
+ llm = llm_q5_k_m
17
+ else:
18
+ llm = llm_f16
19
+
20
+ for human, assistant in history:
21
+ messages_prompts.append({"role": "user", "content": human})
22
+ messages_prompts.append({"role": "assistant", "content": assistant})
23
+ messages_prompts.append({"role": "user", "content": message})
24
+ response = llm.create_chat_completion(
25
+ messages=messages_prompts,
26
+ stream=True
27
+ )
28
+ message_repl = ""
29
+ for chunk in response:
30
+ if len(chunk['choices'][0]["delta"]) != 0 and "content" in chunk['choices'][0]["delta"]:
31
+ message_repl = message_repl + \
32
+ chunk['choices'][0]["delta"]["content"]
33
+ yield message_repl
34
+
35
+ gr.ChatInterface(
36
+ chat_stream_completion,
37
+ additional_inputs=[gr.Textbox(
38
+ "You are helpful AI.", label="System Prompt"), gr.Checkbox(label="Use Q5-K-M?", value=True)]
39
+ ).queue().launch(server_name="0.0.0.0")
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ llama-cpp-python
2
+ gradio