AeternumS commited on
Commit
96c3529
·
1 Parent(s): e904228
Files changed (2) hide show
  1. app.py +21 -130
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,136 +1,27 @@
1
  import streamlit as st
2
- from llama_cpp_cuda_tensorcores import Llama
3
- from huggingface_hub import hf_hub_download
4
- import spaces
5
-
6
- # Constants
7
- REPO_ID = "MaziyarPanahi/Meta-Llama-3-70B-Instruct-GGUF"
8
- MODEL_NAME = "Meta-Llama-3-70B-Instruct.Q3_K_L.gguf"
9
- MAX_CONTEXT_LENGTH = 8192
10
- CUDA = True
11
- SYSTEM_PROMPT = "You are a helpful, smart, kind, and efficient AI assistant. You always fulfill the user's requests to the best of your ability."
12
- TOKEN_STOP = ["<|eot_id|>"]
13
- SYS_MSG = "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nSYSTEM_PROMPT<|eot_id|>\n"
14
- USER_PROMPT = (
15
- "<|start_header_id|>user<|end_header_id|>\n\nUSER_PROMPT<|eot_id|>\n"
16
- )
17
- ASSIS_PROMPT = "<|start_header_id|>assistant<|end_header_id|>\n\n"
18
- END_ASSIS_PREVIOUS_RESPONSE = "<|eot_id|>\n"
19
-
20
- TASK_PROMPT = {
21
- "Assistant": SYSTEM_PROMPT,
22
  }
23
 
24
- # ChatLLM class for handling the chat
25
- class ChatLLM:
26
- def __init__(self, config_model):
27
- self.llm = None
28
- self.config_model = config_model
29
-
30
- def load_cpp_model(self):
31
- self.llm = Llama(**self.config_model)
32
-
33
- def apply_chat_template(self, history, system_message):
34
- history = history or []
35
- messages = SYS_MSG.replace("SYSTEM_PROMPT", system_message.strip())
36
- for msg in history:
37
- messages += (
38
- USER_PROMPT.replace("USER_PROMPT", msg[0]) + ASSIS_PROMPT + msg[1]
39
- )
40
- messages += END_ASSIS_PREVIOUS_RESPONSE if msg[1] else ""
41
-
42
- return messages
43
-
44
- @spaces.GPU(duration=120)
45
- def response(
46
- self,
47
- history,
48
- system_message,
49
- max_tokens,
50
- temperature,
51
- top_p,
52
- top_k,
53
- repeat_penalty,
54
- ):
55
- messages = self.apply_chat_template(history, system_message)
56
-
57
- history[-1][1] = ""
58
-
59
- if not self.llm:
60
- print("Loading model")
61
- self.load_cpp_model()
62
-
63
- for output in self.llm(
64
- messages,
65
- echo=False,
66
- stream=True,
67
- max_tokens=max_tokens,
68
- temperature=temperature,
69
- top_p=top_p,
70
- top_k=top_k,
71
- repeat_penalty=repeat_penalty,
72
- stop=TOKEN_STOP,
73
- ):
74
- answer = output["choices"][0]["text"]
75
- history[-1][1] += answer
76
-
77
- return history
78
-
79
- # Download model from Hugging Face
80
- model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_NAME)
81
-
82
- # Model configuration
83
- config_model = {
84
- "model_path": model_path,
85
- "n_ctx": MAX_CONTEXT_LENGTH,
86
- "n_gpu_layers": -1 if CUDA else 0,
87
- }
88
-
89
- # Instantiate the chat model
90
- llm_chat = ChatLLM(config_model)
91
-
92
- # Streamlit UI
93
- st.title("AI Chat Assistant")
94
-
95
- # Initialize session state to store the chat history
96
- if "chat_history" not in st.session_state:
97
- st.session_state.chat_history = []
98
-
99
- if "input_text" not in st.session_state:
100
- st.session_state.input_text = ""
101
-
102
- # Define response area
103
- def chat_response():
104
- if st.session_state.input_text.strip():
105
- # User message
106
- history = st.session_state.chat_history
107
- history.append([st.session_state.input_text, ""])
108
-
109
- # Model response
110
- history = llm_chat.response(
111
- history=history,
112
- system_message=SYSTEM_PROMPT,
113
- max_tokens=100, # Adjust token length as needed
114
- temperature=0.7,
115
- top_p=0.9,
116
- top_k=50,
117
- repeat_penalty=1.0,
118
- )
119
-
120
- st.session_state.chat_history = history
121
- st.session_state.input_text = ""
122
-
123
- # Textbox for user input
124
- st.text_input("You: ", key="input_text", on_change=chat_response)
125
 
126
- # Display chat history
127
- if st.session_state.chat_history:
128
- for user_msg, bot_resp in st.session_state.chat_history:
129
- st.markdown(f"**You:** {user_msg}")
130
- st.markdown(f"**Assistant:** {bot_resp}")
131
 
132
- # Clear chat button
133
- def clear_chat():
134
- st.session_state.chat_history = []
135
 
136
- st.button("Clear History", on_click=clear_chat)
 
 
 
 
 
1
  import streamlit as st
2
+ import requests
3
+
4
+ # API URL and headers
5
+ API_URL = "https://pllfc7e5i0rujahy.us-east-1.aws.endpoints.huggingface.cloud"
6
+ headers = {
7
+ "Accept": "application/json",
8
+ "Authorization": "Bearer hf_XXXXX", # Replace with your actual token
9
+ "Content-Type": "application/json"
 
 
 
 
 
 
 
 
 
 
 
 
10
  }
11
 
12
+ # Function to query the API
13
+ def query(payload):
14
+ response = requests.post(API_URL, headers=headers, json=payload)
15
+ return response.json()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ # Streamlit app layout
18
+ st.title("API Query App")
19
+ st.write("This app allows you to query the API and retrieve responses.")
 
 
20
 
21
+ user_input = st.text_input("Enter your input:", "Can you please let us know more details about your ")
 
 
22
 
23
+ if st.button("Submit"):
24
+ with st.spinner("Fetching response..."):
25
+ output = query({"inputs": user_input, "parameters": {}})
26
+ st.success("Response received!")
27
+ st.json(output) # Display the response in a nice format
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ requests