|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
import numpy as np |
|
|
|
BASE_MODEL = "AlekseyDorkin/xlm-roberta-en-ru-emoji" |
|
TOP_N = 5 |
|
|
|
model = AutoModelForSequenceClassification.from_pretrained(BASE_MODEL) |
|
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL) |
|
|
|
def preprocess(text): |
|
new_text = [] |
|
for t in text.split(" "): |
|
t = '@user' if t.startswith('@') and len(t) > 1 else t |
|
t = 'http' if t.startswith('http') else t |
|
new_text.append(t) |
|
return " ".join(new_text) |
|
|
|
def get_top_emojis(text, top_n=TOP_N): |
|
preprocessed = preprocess(text) |
|
inputs = tokenizer(preprocessed, return_tensors="pt") |
|
preds = model(**inputs).logits |
|
scores = torch.nn.functional.softmax(preds, axis=-1).detach().numpy() |
|
ranking = np.argsort(scores) |
|
ranking = ranking[::-1][:top_n] |
|
emojis = [model.config.id2label[index] for index in ranking] |
|
return emojis |
|
|
|
|
|
gradio_ui = gr.Interface( |
|
fn=get_top_emojis, |
|
title="Predicting review scores from customer reviews", |
|
description="Enter some review text about an Amazon product and check what the model predicts for it's star rating.", |
|
inputs=[ |
|
gr.inputs.Textbox(lines=5, label="Paste some text here"), |
|
], |
|
outputs=[ |
|
gr.outputs.Textbox(label=f"№{i}") for i in range(TOP_N) |
|
], |
|
examples=[ |
|
["Awesome!"], ["Круто!"], ["lol"] |
|
], |
|
) |
|
|
|
gradio_ui.launch(debug=True) |