lllchenlll commited on
Commit
eb90fa0
·
1 Parent(s): 44d5661

Add application file

Browse files
Files changed (2) hide show
  1. tag_data/cal.py +22 -0
  2. tag_data/tags.txt +0 -0
tag_data/cal.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from sentence_transformers import SentenceTransformer
3
+
4
+
5
+ encoder = SentenceTransformer("hfl/chinese-roberta-wwm-ext-large")
6
+
7
+ tags = []
8
+ f = open('tags.txt', 'r')
9
+ for line in f.readlines():
10
+ tags.append(line.strip())
11
+ f.close()
12
+
13
+ tags_embed = encoder.encode(tags)
14
+ tags_dis = [np.sqrt(np.dot(_, _.T)) for _ in tags_embed]
15
+
16
+ print(tags_embed.shape, tags_dis.shape)
17
+
18
+ with open('./tags_embed.npy', 'wb') as f:
19
+ np.save(f, tags_embed)
20
+
21
+ with open('./tags_dis.npy', 'wb') as f:
22
+ np.save(f, tags_dis)
tag_data/tags.txt ADDED
The diff for this file is too large to render. See raw diff