rubenroy commited on
Commit
7786282
Β·
verified Β·
1 Parent(s): 8e79c3b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -23
app.py CHANGED
@@ -12,7 +12,7 @@ model = AutoModelForCausalLM.from_pretrained(
12
  tokenizer = AutoTokenizer.from_pretrained(model_name)
13
 
14
  @spaces.GPU
15
- def generate(prompt, history):
16
  messages = [
17
  {"role": "system", "content": "You are Zurich, a 7 billion parameter Large Language model built on the Qwen 2.5 7B model developed by Alibaba Cloud, and fine-tuned by Ruben Roy. You have been fine-tuned with the GammaCorpus v2 dataset, a dataset filled with structured and filtered multi-turn conversations and was also created by Ruben Roy. You are a helpful assistant."},
18
  {"role": "user", "content": prompt}
@@ -25,7 +25,12 @@ def generate(prompt, history):
25
  model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
26
  generated_ids = model.generate(
27
  **model_inputs,
28
- max_new_tokens=512
 
 
 
 
 
29
  )
30
  generated_ids = [
31
  output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
@@ -78,34 +83,27 @@ TITLE_HTML = """
78
  font-size: 0.9rem;
79
  color: #94a3b8;
80
  }
81
- .examples-section {
82
  background: rgba(255, 255, 255, 0.05);
83
  padding: 1.5rem;
84
  border-radius: 1rem;
85
- margin-top: 1.5rem;
86
  border: 1px solid rgba(255, 255, 255, 0.1);
 
87
  }
88
- .example-card {
89
- background: rgba(37, 99, 235, 0.1);
90
- padding: 1rem;
91
- border-radius: 0.5rem;
92
- margin-bottom: 1rem;
93
- cursor: pointer;
94
- transition: all 0.2s ease;
95
- border: 1px solid rgba(37, 99, 235, 0.2);
96
- }
97
- .example-card:hover {
98
- background: rgba(37, 99, 235, 0.15);
99
- transform: translateY(-2px);
100
- }
101
- .example-title {
102
- color: #60a5fa;
103
  font-weight: 600;
104
- margin-bottom: 0.5rem;
 
 
 
105
  }
106
- .example-description {
107
  color: #94a3b8;
108
- font-size: 0.9rem;
 
109
  }
110
  </style>
111
 
@@ -154,6 +152,7 @@ TITLE_HTML = """
154
  </div>
155
  """
156
 
 
157
  examples = [
158
  ["Explain quantum computing in simple terms"],
159
  ["Write a short story about a time traveler"],
@@ -163,11 +162,60 @@ examples = [
163
  ["What are the key differences between machine learning and deep learning?"]
164
  ]
165
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  with gr.Blocks() as demo:
167
  gr.HTML(TITLE_HTML)
168
 
 
 
 
 
169
  chatbot = gr.ChatInterface(
170
- fn=generate,
171
  examples=examples,
172
  title="Chat with Zurich",
173
  description="Ask me anything! I'm here to help with explanations, coding, math, writing, and more.",
 
12
  tokenizer = AutoTokenizer.from_pretrained(model_name)
13
 
14
  @spaces.GPU
15
+ def generate(prompt, history, temperature, top_p, top_k, max_new_tokens, repetition_penalty):
16
  messages = [
17
  {"role": "system", "content": "You are Zurich, a 7 billion parameter Large Language model built on the Qwen 2.5 7B model developed by Alibaba Cloud, and fine-tuned by Ruben Roy. You have been fine-tuned with the GammaCorpus v2 dataset, a dataset filled with structured and filtered multi-turn conversations and was also created by Ruben Roy. You are a helpful assistant."},
18
  {"role": "user", "content": prompt}
 
25
  model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
26
  generated_ids = model.generate(
27
  **model_inputs,
28
+ temperature=temperature,
29
+ top_p=top_p,
30
+ top_k=top_k,
31
+ max_new_tokens=max_new_tokens,
32
+ repetition_penalty=repetition_penalty,
33
+ do_sample=True if temperature > 0 else False
34
  )
35
  generated_ids = [
36
  output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
 
83
  font-size: 0.9rem;
84
  color: #94a3b8;
85
  }
86
+ .settings-section {
87
  background: rgba(255, 255, 255, 0.05);
88
  padding: 1.5rem;
89
  border-radius: 1rem;
90
+ margin: 1.5rem auto;
91
  border: 1px solid rgba(255, 255, 255, 0.1);
92
+ max-width: 800px;
93
  }
94
+ .settings-title {
95
+ color: #e2e8f0;
96
+ font-size: 1.25rem;
 
 
 
 
 
 
 
 
 
 
 
 
97
  font-weight: 600;
98
+ margin-bottom: 1rem;
99
+ display: flex;
100
+ align-items: center;
101
+ gap: 0.7rem;
102
  }
103
+ .parameter-info {
104
  color: #94a3b8;
105
+ font-size: 0.8rem;
106
+ margin-top: 0.25rem;
107
  }
108
  </style>
109
 
 
152
  </div>
153
  """
154
 
155
+ # Define example conversations
156
  examples = [
157
  ["Explain quantum computing in simple terms"],
158
  ["Write a short story about a time traveler"],
 
162
  ["What are the key differences between machine learning and deep learning?"]
163
  ]
164
 
165
+ def create_generation_settings():
166
+ with gr.Group():
167
+ with gr.Accordion("Generation Settings", open=False):
168
+ temperature = gr.Slider(
169
+ minimum=0.0,
170
+ maximum=2.0,
171
+ value=0.7,
172
+ step=0.1,
173
+ label="Temperature",
174
+ info="Higher values make the output more random, lower values make it more focused and deterministic"
175
+ )
176
+ top_p = gr.Slider(
177
+ minimum=0.0,
178
+ maximum=1.0,
179
+ value=0.9,
180
+ step=0.05,
181
+ label="Top P",
182
+ info="Used for nucleus sampling - controls the cumulative probability of tokens to consider"
183
+ )
184
+ top_k = gr.Slider(
185
+ minimum=1,
186
+ maximum=100,
187
+ value=50,
188
+ step=1,
189
+ label="Top K",
190
+ info="Limits the number of tokens to consider for each step of text generation"
191
+ )
192
+ max_new_tokens = gr.Slider(
193
+ minimum=1,
194
+ maximum=2048,
195
+ value=512,
196
+ step=1,
197
+ label="Max New Tokens",
198
+ info="Maximum number of tokens to generate in the response"
199
+ )
200
+ repetition_penalty = gr.Slider(
201
+ minimum=1.0,
202
+ maximum=2.0,
203
+ value=1.1,
204
+ step=0.1,
205
+ label="Repetition Penalty",
206
+ info="Higher values prevent the model from repeating the same information"
207
+ )
208
+ return temperature, top_p, top_k, max_new_tokens, repetition_penalty
209
+
210
  with gr.Blocks() as demo:
211
  gr.HTML(TITLE_HTML)
212
 
213
+ # Create generation settings
214
+ temperature, top_p, top_k, max_new_tokens, repetition_penalty = create_generation_settings()
215
+
216
+ # Create the chat interface with the additional parameters
217
  chatbot = gr.ChatInterface(
218
+ fn=lambda msg, history: generate(msg, history, temperature.value, top_p.value, top_k.value, max_new_tokens.value, repetition_penalty.value),
219
  examples=examples,
220
  title="Chat with Zurich",
221
  description="Ask me anything! I'm here to help with explanations, coding, math, writing, and more.",