jonathanjordan21 commited on
Commit
2ef9ba6
·
verified ·
1 Parent(s): b969bda

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -4
app.py CHANGED
@@ -1,10 +1,53 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
 
10
  def respond(
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
+ # """
5
+ # For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
+ # """
7
+ # client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
+
9
+ from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM#, MambaForCausalLM
10
+ from peft import PeftConfig, PeftModel
11
+
12
+ config = PeftConfig.from_pretrained("jonathanjordan21/mos-mamba-6x130m-trainer")
13
+
14
+ tokenizer = AutoTokenizer.from_pretrained("jonathanjordan21/mos-mamba-6x130m-trainer", trust_remote_code=True)
15
+
16
+ model = AutoModelForCausalLM.from_pretrained(
17
+ "jonathanjordan21/mos-mamba-6x130m-trainer",
18
+ eos_token_id=tokenizer.eos_token_id,
19
+ trust_remote_code=True
20
+ )
21
+
22
+ model = PeftModel.from_pretrained(model, "jonathanjordan21/mos-mamba-6x130m-trainer",)#, adapter_name="norobots")
23
+ model = model.merge_and_unload()
24
+
25
+
26
+ def invoke(
27
+ message,
28
+ history: list[tuple[str, str]],
29
+ system_message,
30
+ max_tokens,
31
+ temperature,
32
+ top_p,
33
+ ):
34
+ messages = [{"role": "system", "content": system_message}]
35
+
36
+ for val in history:
37
+ if val[0]:
38
+ messages.append({"role": "user", "content": val[0]})
39
+ if val[1]:
40
+ messages.append({"role": "assistant", "content": val[1]})
41
+
42
+ messages.append({"role": "user", "content": message})
43
+
44
+ tokens = tokenizer.apply_chat_template(message, return_tensors='pt')
45
+
46
+ out = model.generate(**tokens, eos_token_id=model.config.eos_token_id, max_new_tokens=max_tokens)
47
+
48
+ res = tokenizer.batch_decode(out)
49
+
50
+ return res
51
 
52
 
53
  def respond(