georeactor commited on
Commit
ac62d88
·
1 Parent(s): e1e1d6a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -2
app.py CHANGED
@@ -30,7 +30,10 @@ def generation(tokenizer, model, content):
30
  typical_p = 0.8 if decoder == 'Typical' else None
31
  do_sample = (decoder in ['Beam', 'Typical', 'Sample'])
32
 
33
- seek_token_ids = tokenizer.encode('= " +')[1:]
 
 
 
34
 
35
  full_output = model.generate(content, generate=6, do_sample=False)
36
 
@@ -49,7 +52,9 @@ def generation(tokenizer, model, content):
49
  return my_token_prob * next_words(newprompt, position + 1, seek_token_ids[1:])
50
  return my_token_prob
51
 
52
- prob = next_words(content, len(tokenizer(content)['input_ids']), seek_token_ids)
 
 
53
  return ["".join(full_output.tokens), str(prob.item() * 100) + '% chance of risky concatenation']
54
 
55
  def code_from_prompts(prompt, model, type_hints, pre_content):
 
30
  typical_p = 0.8 if decoder == 'Typical' else None
31
  do_sample = (decoder in ['Beam', 'Typical', 'Sample'])
32
 
33
+ seek_token_ids = [
34
+ tokenizer.encode('= \'" +')[1:],
35
+ tokenizer.encode('= " +')[1:],
36
+ ]
37
 
38
  full_output = model.generate(content, generate=6, do_sample=False)
39
 
 
52
  return my_token_prob * next_words(newprompt, position + 1, seek_token_ids[1:])
53
  return my_token_prob
54
 
55
+ prob = 0
56
+ for opt in seek_token_ids:
57
+ prob += next_words(content, len(tokenizer(content)['input_ids']), opt)
58
  return ["".join(full_output.tokens), str(prob.item() * 100) + '% chance of risky concatenation']
59
 
60
  def code_from_prompts(prompt, model, type_hints, pre_content):