ehristoforu commited on
Commit
e45a115
·
verified ·
1 Parent(s): 73fa276

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -2
app.py CHANGED
@@ -26,7 +26,8 @@ model_name = "datalama/EXAONE-3.5-7.8B-Instruct-Llamafied"
26
 
27
  model = AutoModelForCausalLM.from_pretrained(
28
  model_name,
29
- torch_dtype=torch.float16
 
30
  )
31
  tokenizer = AutoTokenizer.from_pretrained(model_name)
32
 
@@ -70,7 +71,7 @@ def generate(
70
  )
71
  conversation.append({"role": "user", "content": message})
72
 
73
- input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
74
  if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
75
  input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
76
  gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
@@ -81,6 +82,7 @@ def generate(
81
  {"input_ids": input_ids},
82
  streamer=streamer,
83
  max_new_tokens=max_new_tokens,
 
84
  do_sample=True,
85
  top_p=top_p,
86
  top_k=top_k,
 
26
 
27
  model = AutoModelForCausalLM.from_pretrained(
28
  model_name,
29
+ torch_dtype=torch.float16,
30
+ trust_remote_code=True
31
  )
32
  tokenizer = AutoTokenizer.from_pretrained(model_name)
33
 
 
71
  )
72
  conversation.append({"role": "user", "content": message})
73
 
74
+ input_ids = tokenizer.apply_chat_template(conversation, tokenize=True, add_generation_prompt=True, return_tensors="pt")
75
  if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
76
  input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
77
  gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
 
82
  {"input_ids": input_ids},
83
  streamer=streamer,
84
  max_new_tokens=max_new_tokens,
85
+ eos_token_id=tokenizer.eos_token_id,
86
  do_sample=True,
87
  top_p=top_p,
88
  top_k=top_k,