Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import gradio.inputs as grinputs
|
3 |
+
import gradio.outputs as groutputs
|
4 |
+
|
5 |
+
from gensim.models import KeyedVectors
|
6 |
+
from gensim.parsing import preprocessing
|
7 |
+
|
8 |
+
filters = [
|
9 |
+
preprocessing.remove_stopwords,
|
10 |
+
preprocessing.strip_tags,
|
11 |
+
preprocessing.strip_punctuation,
|
12 |
+
preprocessing.strip_numeric,
|
13 |
+
preprocessing.strip_multiple_whitespaces,
|
14 |
+
preprocessing.strip_non_alphanum,
|
15 |
+
preprocessing.strip_short,
|
16 |
+
preprocessing.remove_stopwords,
|
17 |
+
preprocessing.lower_to_unicode,
|
18 |
+
]
|
19 |
+
|
20 |
+
def parse_text(text):
|
21 |
+
return text.replace(" ", "").replace(";", ",").split(',')
|
22 |
+
|
23 |
+
def clean_words(words):
|
24 |
+
clean_dict = {}
|
25 |
+
for (word, score) in words:
|
26 |
+
prep_word = preprocessing.preprocess_string(word, filters=filters)
|
27 |
+
if len(prep_word) > 0:
|
28 |
+
word = prep_word[0]
|
29 |
+
is_clean = sum( [word.startswith(clean_word) or word.endswith(clean_word) for clean_word in clean_dict.keys()] ) == 0
|
30 |
+
if is_clean:
|
31 |
+
clean_dict[word] = round(score, 2)
|
32 |
+
return clean_dict
|
33 |
+
|
34 |
+
path = "cc.fr.300.vec"
|
35 |
+
m = KeyedVectors.load_word2vec_format(path)
|
36 |
+
|
37 |
+
def on_submit(text, mode):
|
38 |
+
print('{} mode'.format(mode))
|
39 |
+
positive = parse_text(text)
|
40 |
+
if mode=='Close':
|
41 |
+
words = m.most_similar(positive=positive, topn=200)
|
42 |
+
else:
|
43 |
+
words = m.most_similar(positive=positive, topn=10000)[::-1]
|
44 |
+
return str(clean_words(words))[1:-1]
|
45 |
+
|
46 |
+
iface = gr.Interface(
|
47 |
+
fn=on_submit,
|
48 |
+
inputs=[
|
49 |
+
grinputs.Textbox(placeholder='mot1, mot2, mot3, ...', label="Input words (coma separated)"),
|
50 |
+
grinputs.Radio(['Close', 'Far'], label="Close of Far mode")],
|
51 |
+
outputs=[
|
52 |
+
groutputs.Textbox(label='Information')],
|
53 |
+
allow_screenshot=False,
|
54 |
+
allow_flagging=False,
|
55 |
+
)
|
56 |
+
iface.launch()
|