EmicoBinsfinder commited on
Commit
e610ece
·
1 Parent(s): d6b5ec6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -5
app.py CHANGED
@@ -7,7 +7,6 @@ import torch.nn as nn
7
  import transformers
8
  from transformers import AutoTokenizer, AutoConfig, LlamaForCausalLM, LlamaTokenizer, GenerationConfig
9
 
10
-
11
  auth_token = os.environ.get("AUTH_TOKEN_SECRET")
12
 
13
  tokenizer = LlamaTokenizer.from_pretrained("Claimed/capybara", use_auth_token=auth_token)
@@ -19,7 +18,44 @@ model = LlamaForCausalLM.from_pretrained(
19
 
20
  #model = model.to('cuda')
21
 
22
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  def add_text(history, text):
24
  history = history + [(text, None)]
25
  return history, ""
@@ -42,12 +78,11 @@ def classifier(userin):
42
  in_emb = classification.sentence_embedder(clean_in, 'Model_bert')
43
 
44
  Number = 10
45
- broad_scope_predictions = classification.broad_scope_class_predictor(class_embeddings, in_emb, Number, Sensitivity='High')
46
 
47
- return broad_scope_predictions[1]
48
 
49
 
50
-
51
  def generateresponse(history):#, task):
52
  """
53
  Model definition here:
 
7
  import transformers
8
  from transformers import AutoTokenizer, AutoConfig, LlamaForCausalLM, LlamaTokenizer, GenerationConfig
9
 
 
10
  auth_token = os.environ.get("AUTH_TOKEN_SECRET")
11
 
12
  tokenizer = LlamaTokenizer.from_pretrained("Claimed/capybara", use_auth_token=auth_token)
 
18
 
19
  #model = model.to('cuda')
20
 
21
+ def broad_scope_class_predictor(class_embeddings, abstract_embedding, N=5, Sensitivity='Medium'):
22
+ """
23
+ Takes in pre-computed class embeddings and abstract texts, converts abstract text into
24
+ :param class_embeddings: dataframe of class embeddings
25
+ :param abstract: a single abstract embedding
26
+ :param N: N highest matching classes to return, from highest to lowest, default is 5
27
+ :return: predictions: a full dataframe of all the predictions on the 9500+ classes, HighestSimilarity: Dataframe of the N most similar classes
28
+ """
29
+ predictions = pd.DataFrame(columns=['Class Name', 'Score'])
30
+ for i in range(len(class_embeddings)):
31
+ class_name = class_embeddings.iloc[i, 0]
32
+ embedding = class_embeddings.iloc[i, 2]
33
+ embedding = convert_saved_embeddings(embedding)
34
+ abstract_embedding = abstract_embedding.numpy()
35
+ abstract_embedding = torch.from_numpy(abstract_embedding)
36
+ cos = torch.nn.CosineSimilarity(dim=1)
37
+ score = cos(abstract_embedding, embedding).numpy().tolist()
38
+ result = [class_name, score[0]]
39
+ predictions.loc[len(predictions)] = result
40
+ greenpredictions = predictions.tail(52)
41
+ if Sensitivity == 'High':
42
+ Threshold = 0.5
43
+ elif Sensitivity == 'Medium':
44
+ Threshold = 0.40
45
+ elif Sensitivity == 'Low':
46
+ Threshold = 0.35
47
+ GreenLikelihood = 'False'
48
+ for i in range(len(greenpredictions)):
49
+ score = greenpredictions.iloc[i, 1]
50
+ if float(score) >= Threshold:
51
+ GreenLikelihood = 'True'
52
+ break
53
+ else:
54
+ continue
55
+ HighestSimilarity = predictions.nlargest(N, ['Score'])
56
+
57
+ return HighestSimilarity
58
+
59
  def add_text(history, text):
60
  history = history + [(text, None)]
61
  return history, ""
 
78
  in_emb = classification.sentence_embedder(clean_in, 'Model_bert')
79
 
80
  Number = 10
81
+ broad_scope_predictions = broad_scope_class_predictor(class_embeddings, in_emb, Number, Sensitivity='High')
82
 
83
+ return broad_scope_predictions
84
 
85
 
 
86
  def generateresponse(history):#, task):
87
  """
88
  Model definition here: