Update app.py
Browse files
app.py
CHANGED
@@ -20,80 +20,130 @@ def fill_mask_for_languages(sentences):
|
|
20 |
return results
|
21 |
|
22 |
def replace_mask(sentence, predicted_word):
|
23 |
-
return sentence.replace("____", predicted_word)
|
24 |
|
25 |
st.title("Fill Mask for Multiple Languages | Zabantu-XLM-Roberta")
|
26 |
st.write("This app predicts the missing word for sentences in Zulu, Tshivenda, Sepedi, Tswana, and Tsonga using a Zabantu BERT model.")
|
|
|
27 |
|
28 |
-
|
29 |
-
f"'{lang}': '{sentence}'," for lang, sentence in sample_sentences.items()
|
30 |
-
))
|
31 |
|
32 |
-
|
33 |
-
|
|
|
|
|
34 |
|
35 |
-
|
36 |
-
|
37 |
-
# st.write(user_predictions)
|
38 |
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
|
|
|
|
|
|
|
|
44 |
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
css = """
|
54 |
<style>
|
55 |
-
footer {display:none !important}
|
56 |
-
|
57 |
-
.
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
}
|
71 |
-
|
72 |
-
|
73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
}
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
padding: 0.75em;
|
79 |
-
border-radius: 10px;
|
80 |
-
font-size: 16px;
|
81 |
-
width: 100%;
|
82 |
}
|
83 |
-
|
84 |
-
|
85 |
-
border-color: #17152e;
|
86 |
-
outline: none;
|
87 |
-
box-shadow: 0px 0px 5px rgba(23, 21, 46, 0.5);
|
88 |
}
|
89 |
-
|
90 |
-
|
91 |
-
|
|
|
92 |
}
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
}
|
98 |
</style>
|
99 |
"""
|
|
|
20 |
return results
|
21 |
|
22 |
def replace_mask(sentence, predicted_word):
|
23 |
+
return sentence.replace("____", f"**predicted_word**")
|
24 |
|
25 |
st.title("Fill Mask for Multiple Languages | Zabantu-XLM-Roberta")
|
26 |
st.write("This app predicts the missing word for sentences in Zulu, Tshivenda, Sepedi, Tswana, and Tsonga using a Zabantu BERT model.")
|
27 |
+
st.write(f"")
|
28 |
|
29 |
+
col1, col2 = st.columns(2)
|
|
|
|
|
30 |
|
31 |
+
with col1:
|
32 |
+
user_sentence = st.text_area("Enter your own sentence with a masked word (use '____'):", "\n".join(
|
33 |
+
f"'{lang}': '{sentence}'," for lang, sentence in sample_sentences.items()
|
34 |
+
))
|
35 |
|
36 |
+
if st.button("Submit"):
|
37 |
+
user_masked_sentence = user_sentence.replace('____', unmasker.tokenizer.mask_token)
|
|
|
38 |
|
39 |
+
with col2:
|
40 |
+
if 'user_masked_sentence' in locals():
|
41 |
+
if user_masked_sentence:
|
42 |
+
user_predictions = unmasker(user_masked_sentence)
|
43 |
+
|
44 |
+
# st.write(user_predictions)
|
45 |
+
|
46 |
+
if len(user_predictions) > 0:
|
47 |
+
# st.write(f"Top prediction for the masked token: {user_predictions[0]['sequence']}")
|
48 |
|
49 |
+
st.write("### Predictions for Sample Sentences:")
|
50 |
+
predictions = fill_mask_for_languages(sample_sentences)
|
51 |
+
st.write(f"{predictions}")
|
52 |
+
|
53 |
+
if 'predictions' in locals():
|
54 |
+
if predictions:
|
55 |
+
for language, language_predictions in predictions.items():
|
56 |
+
# original_sentence = sample_sentences[language]
|
57 |
+
predicted_sentence = replace_mask(original_sentence, language_predictions[0]['token_str'])
|
58 |
+
# st.write(language_predictions)
|
59 |
+
# st.write(f"Original sentence ({language}): {original_sentence}")
|
60 |
+
st.write(f"{language}: {predicted_sentence}\n")
|
61 |
+
|
62 |
|
63 |
css = """
|
64 |
<style>
|
65 |
+
footer {display:none !important;}
|
66 |
+
|
67 |
+
.gr-button-primary {
|
68 |
+
z-index: 14;
|
69 |
+
height: 43px;
|
70 |
+
width: 130px;
|
71 |
+
left: 0px;
|
72 |
+
top: 0px;
|
73 |
+
padding: 0px;
|
74 |
+
cursor: pointer !important;
|
75 |
+
background: none rgb(17, 20, 45) !important;
|
76 |
+
border: none !important;
|
77 |
+
text-align: center !important;
|
78 |
+
font-family: Poppins !important;
|
79 |
+
font-size: 14px !important;
|
80 |
+
font-weight: 500 !important;
|
81 |
+
color: rgb(255, 255, 255) !important;
|
82 |
+
line-height: 1 !important;
|
83 |
+
border-radius: 12px !important;
|
84 |
+
transition: box-shadow 200ms ease 0s, background 200ms ease 0s !important;
|
85 |
+
box-shadow: none !important;
|
86 |
}
|
87 |
+
.gr-button-primary:hover{
|
88 |
+
z-index: 14;
|
89 |
+
height: 43px;
|
90 |
+
width: 130px;
|
91 |
+
left: 0px;
|
92 |
+
top: 0px;
|
93 |
+
padding: 0px;
|
94 |
+
cursor: pointer !important;
|
95 |
+
background: none rgb(66, 133, 244) !important;
|
96 |
+
border: none !important;
|
97 |
+
text-align: center !important;
|
98 |
+
font-family: Poppins !important;
|
99 |
+
font-size: 14px !important;
|
100 |
+
font-weight: 500 !important;
|
101 |
+
color: rgb(255, 255, 255) !important;
|
102 |
+
line-height: 1 !important;
|
103 |
+
border-radius: 12px !important;
|
104 |
+
transition: box-shadow 200ms ease 0s, background 200ms ease 0s !important;
|
105 |
+
box-shadow: rgb(0 0 0 / 23%) 0px 1px 7px 0px !important;
|
106 |
}
|
107 |
+
.hover\:bg-orange-50:hover {
|
108 |
+
--tw-bg-opacity: 1 !important;
|
109 |
+
background-color: rgb(229,225,255) !important;
|
|
|
|
|
|
|
|
|
110 |
}
|
111 |
+
.to-orange-200 {
|
112 |
+
--tw-gradient-to: rgb(37 56 133 / 37%) !important;
|
|
|
|
|
|
|
113 |
}
|
114 |
+
.from-orange-400 {
|
115 |
+
--tw-gradient-from: rgb(17, 20, 45) !important;
|
116 |
+
--tw-gradient-to: rgb(255 150 51 / 0);
|
117 |
+
--tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to) !important;
|
118 |
}
|
119 |
+
.group-hover\:from-orange-500{
|
120 |
+
--tw-gradient-from:rgb(17, 20, 45) !important;
|
121 |
+
--tw-gradient-to: rgb(37 56 133 / 37%);
|
122 |
+
--tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to) !important;
|
123 |
+
}
|
124 |
+
.group:hover .group-hover\:text-orange-500{
|
125 |
+
--tw-text-opacity: 1 !important;
|
126 |
+
color:rgb(37 56 133 / var(--tw-text-opacity)) !important;
|
127 |
+
}
|
128 |
+
.container {
|
129 |
+
display: flex;
|
130 |
+
justify-content: space-between;
|
131 |
+
align-items: center;
|
132 |
+
margin-bottom: 5px;
|
133 |
+
width: 100%;
|
134 |
+
}
|
135 |
+
.bar {
|
136 |
+
width: 70%;
|
137 |
+
background-color: #e6e6e6;
|
138 |
+
border-radius: 12px;
|
139 |
+
overflow: hidden;
|
140 |
+
margin-right: 10px;
|
141 |
+
height: 5px;
|
142 |
+
}
|
143 |
+
.bar-fill {
|
144 |
+
background-color: #17152e;
|
145 |
+
height: 100%;
|
146 |
+
border-radius: 12px;
|
147 |
}
|
148 |
</style>
|
149 |
"""
|