BruceLee1234 commited on
Commit
9803328
·
verified ·
1 Parent(s): 3b0db5f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -48
app.py CHANGED
@@ -1,54 +1,31 @@
1
- import gradio as gr
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
 
5
- # Load the HelpingAI2.5-10B model
6
- model = AutoModelForCausalLM.from_pretrained("HelpingAI/HelpingAI-flash")
7
  # Load the tokenizer
8
- tokenizer = AutoTokenizer.from_pretrained("HelpingAI/HelpingAI-flash")
9
-
10
- # Move model to GPU (if available) or CPU
11
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
- model.to(device)
13
-
14
- # Define the function for generating responses
15
- def generate_response(user_input):
16
- # Define the chat input
17
- chat = [
18
- { "role": "system", "content": "You are HelpingAI, an emotional AI. Always answer my questions in the HelpingAI style." },
19
- { "role": "user", "content": user_input }
20
- ]
21
-
22
- # Concatenate the chat conversation into a single string with the proper format
23
- chat_input = ""
24
- for message in chat:
25
- role = message["role"]
26
- content = message["content"]
27
- chat_input += f"{role}: {content}\n"
28
-
29
- # Tokenize the input
30
- inputs = tokenizer(chat_input, return_tensors="pt").to(device)
31
-
32
- # Generate text
33
- outputs = model.generate(
34
- inputs["input_ids"],
35
- max_new_tokens=256,
36
- do_sample=True,
37
- temperature=0.6,
38
- top_p=0.9,
39
- )
40
-
41
- # Decode the generated text
42
- response = outputs[0][inputs["input_ids"].shape[-1]:]
43
- return tokenizer.decode(response, skip_special_tokens=True)
44
-
45
- # Create the Gradio interface
46
- iface = gr.Interface(
47
- fn=generate_response, # Function to generate response
48
- inputs="text", # Input is a text box for the user to type in
49
- outputs="text", # Output is the generated response (text)
50
- live=True # Allows updating as the user types
51
  )
52
 
53
- # Launch the Gradio app
54
- iface.launch()
 
 
1
  import torch
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
 
4
+ # Load the HelpingAI2.5-2B model
5
+ model = AutoModelForCausalLM.from_pretrained("OEvortex/HelpingAI2.5-2B")
6
  # Load the tokenizer
7
+ tokenizer = AutoTokenizer.from_pretrained("OEvortex/HelpingAI2.5-2B")
8
+
9
+ # Define the chat input
10
+ chat = [
11
+ { "role": "system", "content": "You are HelpingAI, an emotional AI. Always answer my questions in the HelpingAI style." },
12
+ { "role": "user", "content": "GIVE ME YOUR INTRO" }
13
+ ]
14
+
15
+ inputs = tokenizer.apply_chat_template(
16
+ chat,
17
+ add_generation_prompt=True,
18
+ return_tensors="pt"
19
+ ).to(model.device)
20
+
21
+ # Generate text
22
+ outputs = model.generate(
23
+ inputs,
24
+ max_new_tokens=256,
25
+ do_sample=True,
26
+ temperature=0.6,
27
+ top_p=0.9,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  )
29
 
30
+ response = outputs[0][inputs.shape[-1]:]
31
+ print(tokenizer.decode(response, skip_special_tokens=True))