EmicoBinsfinder commited on
Commit
1423dfb
·
1 Parent(s): e610ece

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -2
app.py CHANGED
@@ -54,6 +54,22 @@ def broad_scope_class_predictor(class_embeddings, abstract_embedding, N=5, Sensi
54
  continue
55
  HighestSimilarity = predictions.nlargest(N, ['Score'])
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  return HighestSimilarity
58
 
59
  def add_text(history, text):
@@ -75,14 +91,13 @@ class_embeddings = pd.read_csv('Embeddings/MainClassEmbeddings.csv')
75
 
76
  def classifier(userin):
77
  clean_in = classification.clean_data(userin, type='String')
78
- in_emb = classification.sentence_embedder(clean_in, 'Model_bert')
79
 
80
  Number = 10
81
  broad_scope_predictions = broad_scope_class_predictor(class_embeddings, in_emb, Number, Sensitivity='High')
82
 
83
  return broad_scope_predictions
84
 
85
-
86
  def generateresponse(history):#, task):
87
  """
88
  Model definition here:
 
54
  continue
55
  HighestSimilarity = predictions.nlargest(N, ['Score'])
56
 
57
+ def sentence_embedder(sentences, model_path):
58
+ """
59
+ Calling the sentence similarity model to generate embeddings on input text.
60
+ :param sentences: takes input text in the form of a string
61
+ :param model_path: path to the text similarity model
62
+ :return returns a (1, 384) embedding of the input text
63
+ """
64
+ tokenizer = AutoTokenizer.from_pretrained(model_path) #instantiating the sentence embedder using HuggingFace library
65
+ model = AutoModel.from_pretrained(model_path, from_tf=True) #making a model instance
66
+ encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
67
+ # Compute token embeddings
68
+ with torch.no_grad():
69
+ model_output = model(**encoded_input)
70
+ sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask']) #outputs a (1, 384) tensor representation of input text
71
+ return sentence_embeddings
72
+
73
  return HighestSimilarity
74
 
75
  def add_text(history, text):
 
91
 
92
  def classifier(userin):
93
  clean_in = classification.clean_data(userin, type='String')
94
+ in_emb = sentence_embedder(clean_in, 'Model_bert')
95
 
96
  Number = 10
97
  broad_scope_predictions = broad_scope_class_predictor(class_embeddings, in_emb, Number, Sensitivity='High')
98
 
99
  return broad_scope_predictions
100
 
 
101
  def generateresponse(history):#, task):
102
  """
103
  Model definition here: