Spaces:

rubenroy
/

Zurich-14B

Running on Zero

App Files Files Community

rubenroy commited on 22 days ago

Commit

7786282

verified ·

1 Parent(s): 8e79c3b

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -23

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ model = AutoModelForCausalLM.from_pretrained(
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 @spaces.GPU
-def generate(prompt, history):
     messages = [
         {"role": "system", "content": "You are Zurich, a 7 billion parameter Large Language model built on the Qwen 2.5 7B model developed by Alibaba Cloud, and fine-tuned by Ruben Roy. You have been fine-tuned with the GammaCorpus v2 dataset, a dataset filled with structured and filtered multi-turn conversations and was also created by Ruben Roy. You are a helpful assistant."},
         {"role": "user", "content": prompt}
@@ -25,7 +25,12 @@ def generate(prompt, history):
     model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
     generated_ids = model.generate(
         **model_inputs,
-        max_new_tokens=512
     )
     generated_ids = [
         output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
@@ -78,34 +83,27 @@ TITLE_HTML = """
         font-size: 0.9rem;
         color: #94a3b8;
     }
-    .examples-section {
         background: rgba(255, 255, 255, 0.05);
         padding: 1.5rem;
         border-radius: 1rem;
-        margin-top: 1.5rem;
         border: 1px solid rgba(255, 255, 255, 0.1);
     }
-    .example-card {
-        background: rgba(37, 99, 235, 0.1);
-        padding: 1rem;
-        border-radius: 0.5rem;
-        margin-bottom: 1rem;
-        cursor: pointer;
-        transition: all 0.2s ease;
-        border: 1px solid rgba(37, 99, 235, 0.2);
-    }
-    .example-card:hover {
-        background: rgba(37, 99, 235, 0.15);
-        transform: translateY(-2px);
-    }
-    .example-title {
-        color: #60a5fa;
         font-weight: 600;
-        margin-bottom: 0.5rem;
     }
-    .example-description {
         color: #94a3b8;
-        font-size: 0.9rem;
     }
 </style>
@@ -154,6 +152,7 @@ TITLE_HTML = """
 </div>
 """
 examples = [
     ["Explain quantum computing in simple terms"],
     ["Write a short story about a time traveler"],
@@ -163,11 +162,60 @@ examples = [
     ["What are the key differences between machine learning and deep learning?"]
 ]
 with gr.Blocks() as demo:
     gr.HTML(TITLE_HTML)
     chatbot = gr.ChatInterface(
-        fn=generate,
         examples=examples,
         title="Chat with Zurich",
         description="Ask me anything! I'm here to help with explanations, coding, math, writing, and more.",

 tokenizer = AutoTokenizer.from_pretrained(model_name)
 @spaces.GPU
+def generate(prompt, history, temperature, top_p, top_k, max_new_tokens, repetition_penalty):
     messages = [
         {"role": "system", "content": "You are Zurich, a 7 billion parameter Large Language model built on the Qwen 2.5 7B model developed by Alibaba Cloud, and fine-tuned by Ruben Roy. You have been fine-tuned with the GammaCorpus v2 dataset, a dataset filled with structured and filtered multi-turn conversations and was also created by Ruben Roy. You are a helpful assistant."},
         {"role": "user", "content": prompt}
     model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
     generated_ids = model.generate(
         **model_inputs,
+        temperature=temperature,
+        top_p=top_p,
+        top_k=top_k,
+        max_new_tokens=max_new_tokens,
+        repetition_penalty=repetition_penalty,
+        do_sample=True if temperature > 0 else False
     )
     generated_ids = [
         output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
         font-size: 0.9rem;
         color: #94a3b8;
     }
+    .settings-section {
         background: rgba(255, 255, 255, 0.05);
         padding: 1.5rem;
         border-radius: 1rem;
+        margin: 1.5rem auto;
         border: 1px solid rgba(255, 255, 255, 0.1);
+        max-width: 800px;
     }
+    .settings-title {
+        color: #e2e8f0;
+        font-size: 1.25rem;
         font-weight: 600;
+        margin-bottom: 1rem;
+        display: flex;
+        align-items: center;
+        gap: 0.7rem;
     }
+    .parameter-info {
         color: #94a3b8;
+        font-size: 0.8rem;
+        margin-top: 0.25rem;
     }
 </style>
 </div>
 """
+# Define example conversations
 examples = [
     ["Explain quantum computing in simple terms"],
     ["Write a short story about a time traveler"],
     ["What are the key differences between machine learning and deep learning?"]
 ]
+def create_generation_settings():
+    with gr.Group():
+        with gr.Accordion("Generation Settings", open=False):
+            temperature = gr.Slider(
+                minimum=0.0,
+                maximum=2.0,
+                value=0.7,
+                step=0.1,
+                label="Temperature",
+                info="Higher values make the output more random, lower values make it more focused and deterministic"
+            )
+            top_p = gr.Slider(
+                minimum=0.0,
+                maximum=1.0,
+                value=0.9,
+                step=0.05,
+                label="Top P",
+                info="Used for nucleus sampling - controls the cumulative probability of tokens to consider"
+            )
+            top_k = gr.Slider(
+                minimum=1,
+                maximum=100,
+                value=50,
+                step=1,
+                label="Top K",
+                info="Limits the number of tokens to consider for each step of text generation"
+            )
+            max_new_tokens = gr.Slider(
+                minimum=1,
+                maximum=2048,
+                value=512,
+                step=1,
+                label="Max New Tokens",
+                info="Maximum number of tokens to generate in the response"
+            )
+            repetition_penalty = gr.Slider(
+                minimum=1.0,
+                maximum=2.0,
+                value=1.1,
+                step=0.1,
+                label="Repetition Penalty",
+                info="Higher values prevent the model from repeating the same information"
+            )
+    return temperature, top_p, top_k, max_new_tokens, repetition_penalty
 with gr.Blocks() as demo:
     gr.HTML(TITLE_HTML)
+    # Create generation settings
+    temperature, top_p, top_k, max_new_tokens, repetition_penalty = create_generation_settings()
+    # Create the chat interface with the additional parameters
     chatbot = gr.ChatInterface(
+        fn=lambda msg, history: generate(msg, history, temperature.value, top_p.value, top_k.value, max_new_tokens.value, repetition_penalty.value),
         examples=examples,
         title="Chat with Zurich",
         description="Ask me anything! I'm here to help with explanations, coding, math, writing, and more.",