vives commited on
Commit
6a4d8b5
·
1 Parent(s): 990c692

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -14
app.py CHANGED
@@ -40,20 +40,6 @@ def get_transcript(file):
40
  transcript = data['results'].values[1][0]['transcript']
41
  transcript = transcript.lower()
42
  return transcript
43
- #
44
- """preprocess tags"""
45
- if tags:
46
- tags = [x.lower().strip() for x in tags.split(",")]
47
- tags_tokens = concat_tokens(tags)
48
- tags_tokens.pop("KPS")
49
- with torch.no_grad():
50
- outputs_tags = model(**tags_tokens)
51
- pools_tags = pool_embeddings(outputs_tags, tags_tokens).detach().numpy()
52
- token_dict = {}
53
- for tag,embedding in zip(tags,pools_tags):
54
- token_dict[tag] = embedding
55
-
56
- """Code related with processing text, extracting KPs, and doing distance to tag"""
57
  def concat_tokens(sentences):
58
  tokens = {'input_ids': [], 'attention_mask': [], 'KPS': {}}
59
  for sentence, values in sentences.items():
@@ -70,6 +56,21 @@ def concat_tokens(sentences):
70
  tokens['attention_mask'] = torch.stack(tokens['attention_mask'])
71
  return tokens
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  def calculate_weighted_embed_dist(out, tokens, weight, text,kp_dict, idx, exclude_text=False,exclude_words=False):
74
  sim_dict = {}
75
  pools = pool_embeddings_count(out, tokens, idx).detach().numpy()
 
40
  transcript = data['results'].values[1][0]['transcript']
41
  transcript = transcript.lower()
42
  return transcript
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  def concat_tokens(sentences):
44
  tokens = {'input_ids': [], 'attention_mask': [], 'KPS': {}}
45
  for sentence, values in sentences.items():
 
56
  tokens['attention_mask'] = torch.stack(tokens['attention_mask'])
57
  return tokens
58
 
59
+ """preprocess tags"""
60
+ if tags:
61
+ tags = [x.lower().strip() for x in tags.split(",")]
62
+ tags_tokens = concat_tokens(tags)
63
+ tags_tokens.pop("KPS")
64
+ with torch.no_grad():
65
+ outputs_tags = model(**tags_tokens)
66
+ pools_tags = pool_embeddings(outputs_tags, tags_tokens).detach().numpy()
67
+ token_dict = {}
68
+ for tag,embedding in zip(tags,pools_tags):
69
+ token_dict[tag] = embedding
70
+
71
+ """Code related with processing text, extracting KPs, and doing distance to tag"""
72
+
73
+
74
  def calculate_weighted_embed_dist(out, tokens, weight, text,kp_dict, idx, exclude_text=False,exclude_words=False):
75
  sim_dict = {}
76
  pools = pool_embeddings_count(out, tokens, idx).detach().numpy()