TejAndrewsACC commited on
Commit
9bf7d07
·
verified ·
1 Parent(s): 696e9bb

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -0
app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
+ from llama_cpp import Llama
4
+
5
+ # Initialize the InferenceClient
6
+ client = InferenceClient()
7
+
8
+ llm = Llama.from_pretrained(
9
+ repo_id="bartowski/Reasoning-Llama-1b-v0.1-GGUF",
10
+ filename="Reasoning-Llama-1b-v0.1-f16.gguf",
11
+ )
12
+
13
+ # Fixed system message
14
+ FIXED_SYSTEM_MESSAGE = "You are an artifial inteligence created by the ACC(Algorithmic Computer-generated Consciousness)."
15
+
16
+ def respond(
17
+ message,
18
+ history: list[tuple[str, str]],
19
+ user_system_message, # User-configurable system message
20
+ max_tokens,
21
+ temperature,
22
+ top_p,
23
+ ):
24
+ # Combine the fixed and user-provided system messages
25
+ combined_system_message = f"{FIXED_SYSTEM_MESSAGE} {user_system_message}"
26
+
27
+ # Construct the messages list
28
+ messages = [{"role": "system", "content": combined_system_message}]
29
+
30
+ for val in history:
31
+ if val[0]:
32
+ messages.append({"role": "user", "content": val[0]})
33
+ if val[1]:
34
+ messages.append({"role": "assistant", "content": val[1]})
35
+
36
+ messages.append({"role": "user", "content": message})
37
+
38
+ response = ""
39
+
40
+ # Use the client to get the chat completion
41
+ for message in client.chat_completion(
42
+ messages,
43
+ max_tokens=max_tokens,
44
+ stream=True,
45
+ temperature=temperature,
46
+ top_p=top_p,
47
+ ):
48
+ token = message['choices'][0]['delta']['content']
49
+ response += token
50
+ yield response
51
+
52
+ # Gradio interface
53
+ demo = gr.ChatInterface(
54
+ respond,
55
+ additional_inputs=[
56
+ gr.Textbox(value="", label="System Message"),
57
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Maximum response length"),
58
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Creativity"),
59
+ gr.Slider(
60
+ minimum=0.1,
61
+ maximum=1.0,
62
+ value=0.95,
63
+ step=0.05,
64
+ label="Neural Activity",
65
+ ),
66
+ ],
67
+ )
68
+
69
+ if __name__ == "__main__":
70
+ demo.launch()