FM-1976 commited on
Commit
20b41d7
Β·
verified Β·
1 Parent(s): e097e35

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +194 -0
app.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from llama_cpp import Llama
3
+ import warnings
4
+ warnings.filterwarnings(action='ignore')
5
+ import datetime
6
+ import random
7
+ import string
8
+ from time import sleep
9
+ import tiktoken
10
+
11
+ # for counting the tokens in the prompt and in the result
12
+ #context_count = len(encoding.encode(yourtext))
13
+ encoding = tiktoken.get_encoding("r50k_base")
14
+
15
+ nCTX = 8192
16
+ sTOPS = ['<eos>']
17
+ modelname = "Gemma2-2B-it"
18
+ # Set the webpage title
19
+ st.set_page_config(
20
+ page_title=f"Your LocalGPT ✨ with {modelname}",
21
+ page_icon="🌟",
22
+ layout="wide")
23
+
24
+ if "hf_model" not in st.session_state:
25
+ st.session_state.hf_model = "Gemma2-2B-it"
26
+ # Initialize chat history
27
+ if "messages" not in st.session_state:
28
+ st.session_state.messages = []
29
+
30
+ if "repeat" not in st.session_state:
31
+ st.session_state.repeat = 1.35
32
+
33
+ if "temperature" not in st.session_state:
34
+ st.session_state.temperature = 0.1
35
+
36
+ if "maxlength" not in st.session_state:
37
+ st.session_state.maxlength = 500
38
+
39
+ if "speed" not in st.session_state:
40
+ st.session_state.speed = 0.0
41
+
42
+ if "numOfTurns" not in st.session_state:
43
+ st.session_state.numOfTurns = 0
44
+
45
+ if "maxTurns" not in st.session_state:
46
+ st.session_state.maxTurns = 5 #must be odd number, greater than equal to 5
47
+
48
+ def writehistory(filename,text):
49
+ with open(filename, 'a', encoding='utf-8') as f:
50
+ f.write(text)
51
+ f.write('\n')
52
+ f.close()
53
+
54
+ def genRANstring(n):
55
+ """
56
+ n = int number of char to randomize
57
+ """
58
+ N = n
59
+ res = ''.join(random.choices(string.ascii_uppercase +
60
+ string.digits, k=N))
61
+ return res
62
+
63
+ @st.cache_resource
64
+ def create_chat():
65
+ # Set HF API token and HF repo
66
+ from llama_cpp import Llama
67
+ client = Llama.from_pretrained((
68
+ repo_id="bartowski/gemma-2-2b-it-GGUF",
69
+ filename="gemma-2-2b-it-Q5_K_M.gguf",
70
+ temperature=0.24,
71
+ n_ctx=nCTX,
72
+ max_tokens=600,
73
+ repeat_penalty=1.176,
74
+ stop=sTOPS,
75
+ verbose=False,
76
+ )
77
+ print('loading gemma-2-2b-it-Q5_K_M.gguf with LlamaCPP...')
78
+ return client
79
+
80
+
81
+ # create THE SESSIoN STATES
82
+ if "logfilename" not in st.session_state:
83
+ ## Logger file
84
+ logfile = f'{genRANstring(5)}_log.txt'
85
+ st.session_state.logfilename = logfile
86
+ #Write in the history the first 2 sessions
87
+ writehistory(st.session_state.logfilename,f'{str(datetime.datetime.now())}\n\nYour own LocalGPT with πŸŒ€ {modelname}\n---\n🧠🫑: You are a helpful assistant.')
88
+ writehistory(st.session_state.logfilename,f'πŸŒ€: How may I help you today?')
89
+
90
+
91
+ #AVATARS
92
+ av_us = 'https://github.com/fabiomatricardi/Gemma2-2b-it-chatbot/raw/main/images/user.png' # './man.png' #"πŸ¦–" #A single emoji, e.g. "πŸ§‘β€πŸ’»", "πŸ€–", "πŸ¦–". Shortcodes are not supported.
93
+ av_ass = 'https://github.com/fabiomatricardi/Gemma2-2b-it-chatbot/raw/main/images/assistant2.png' #'./robot.png'
94
+
95
+ ### START STREAMLIT UI
96
+ # Create a header element
97
+ st.image('https://github.com/fabiomatricardi/Gemma2-2b-it-chatbot/raw/main/images/Gemma-2-Banner.original.jpg',use_column_width=True)
98
+ mytitle = f'> *🌟 {modelname} with {nCTX} tokens Context window* - Turn based Chat available with max capacity of :orange[**{st.session_state.maxTurns} messages**].'
99
+ st.markdown(mytitle, unsafe_allow_html=True)
100
+ #st.markdown('> Local Chat ')
101
+ #st.markdown('---')
102
+
103
+ # CREATE THE SIDEBAR
104
+ with st.sidebar:
105
+ st.image('https://github.com/fabiomatricardi/Gemma2-2b-it-chatbot/raw/main/images/banner.png', use_column_width=True)
106
+ st.session_state.temperature = st.slider('Temperature:', min_value=0.0, max_value=1.0, value=0.65, step=0.01)
107
+ st.session_state.maxlength = st.slider('Length reply:', min_value=150, max_value=2000,
108
+ value=550, step=50)
109
+ st.session_state.repeat = st.slider('Repeat Penalty:', min_value=0.0, max_value=2.0, value=1.176, step=0.02)
110
+ st.session_state.turns = st.toggle('Turn based', value=False, help='Activate Conversational Turn Chat with History',
111
+ disabled=False, label_visibility="visible")
112
+ st.markdown(f"*Number of Max Turns*: {st.session_state.maxTurns}")
113
+ actualTurns = st.markdown(f"*Chat History Lenght*: :green[Good]")
114
+ statspeed = st.markdown(f'πŸ’« speed: {st.session_state.speed} t/s')
115
+ btnClear = st.button("Clear History",type="primary", use_container_width=True)
116
+ st.markdown(f"**Logfile**: {st.session_state.logfilename}")
117
+
118
+ llm = create_chat()
119
+
120
+ # Display chat messages from history on app rerun
121
+ for message in st.session_state.messages:
122
+ if message["role"] == "user":
123
+ with st.chat_message(message["role"],avatar=av_us):
124
+ st.markdown(message["content"])
125
+ else:
126
+ with st.chat_message(message["role"],avatar=av_ass):
127
+ st.markdown(message["content"])
128
+ # Accept user input
129
+ if myprompt := st.chat_input("What is an AI model?"):
130
+ # Add user message to chat history
131
+ st.session_state.messages.append({"role": "user", "content": myprompt})
132
+ st.session_state.numOfTurns = len(st.session_state.messages)
133
+ # Display user message in chat message container
134
+ with st.chat_message("user", avatar=av_us):
135
+ st.markdown(myprompt)
136
+ usertext = f"user: {myprompt}"
137
+ writehistory(st.session_state.logfilename,usertext)
138
+ # Display assistant response in chat message container
139
+ with st.chat_message("assistant",avatar=av_ass):
140
+ message_placeholder = st.empty()
141
+ with st.spinner("Thinking..."):
142
+ start = datetime.datetime.now()
143
+ response = ''
144
+ conv_messages = []
145
+ if st.session_state.turns:
146
+ if st.session_state.numOfTurns > st.session_state.maxTurns:
147
+ conv_messages = st.session_state.messages[-st.session_state.maxTurns:]
148
+ actualTurns.markdown(f"*Chat History Lenght*: :red[Trimmed]")
149
+ else:
150
+ conv_messages = st.session_state.messages
151
+ else:
152
+ conv_messages.append(st.session_state.messages[-1])
153
+ full_response = ""
154
+ for chunk in llm.create_chat_completion(
155
+ messages=conv_messages,
156
+ temperature=st.session_state.temperature,
157
+ repeat_penalty= st.session_state.repeat,
158
+ stop=sTOPS,
159
+ max_tokens=st.session_state.maxlength,
160
+ stream=True,):
161
+ try:
162
+ if chunk["choices"][0]["delta"]["content"]:
163
+ full_response += chunk["choices"][0]["delta"]["content"]
164
+ message_placeholder.markdown(full_response + "🟑")
165
+ delta = datetime.datetime.now() -start
166
+ totalseconds = delta.total_seconds()
167
+ prompttokens = len(encoding.encode(myprompt))
168
+ assistanttokens = len(encoding.encode(full_response))
169
+ totaltokens = prompttokens + assistanttokens
170
+ st.session_state.speed = totaltokens/totalseconds
171
+ statspeed.markdown(f'πŸ’« speed: {st.session_state.speed:.2f} t/s')
172
+ except:
173
+ pass
174
+
175
+ delta = datetime.datetime.now() - start
176
+ totalseconds = delta.total_seconds()
177
+ prompttokens = len(encoding.encode(myprompt))
178
+ assistanttokens = len(encoding.encode(full_response))
179
+ totaltokens = prompttokens + assistanttokens
180
+ st.session_state.speed = totaltokens/totalseconds
181
+ statspeed.markdown(f'πŸ’« speed: {st.session_state.speed:.3f} t/s')
182
+ toregister = full_response + f"""
183
+ ```
184
+ 🧾 prompt tokens: {prompttokens}
185
+ πŸ“ˆ generated tokens: {assistanttokens}
186
+ ⏳ generation time: {delta}
187
+ πŸ’« speed: {st.session_state.speed} t/s
188
+ ```"""
189
+ message_placeholder.markdown(toregister)
190
+ asstext = f"assistant: {toregister}"
191
+ writehistory(st.session_state.logfilename,asstext)
192
+ st.session_state.messages.append({"role": "assistant", "content": toregister})
193
+ st.session_state.numOfTurns = len(st.session_state.messages)
194
+