Spaces:
Sleeping
Sleeping
File size: 7,230 Bytes
425b806 dc6fc47 425b806 d3d5ad6 425b806 dc6fc47 7aac388 dc6fc47 425b806 089ec5a 425b806 089ec5a 425b806 089ec5a 425b806 089ec5a 425b806 491b1ef 425b806 491b1ef 425b806 e6e0497 b893e70 7ab2a7e 7aac388 d3d5ad6 e6e0497 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
import json
import pickle
import gradio as gr
import matplotlib.pyplot as plt
import numpy as np
def plot_scalar_on_scale(scalar_value, distance_type):
# Ensure the scalar is within bounds
scalar_value = np.clip(scalar_value, 0.0, 1.0)
# Create a figure and axis
fig, ax = plt.subplots(figsize=(8, 2))
# Create a horizontal gradient (from close to distant)
gradient = np.linspace(0, 1, 256).reshape(1, -1)
ax.imshow(gradient, extent=[0, 1, 0, 1], aspect='auto', cmap='viridis_r')
# Plot the scalar value as a vertical line
ax.axvline(x=scalar_value, color='white', lw=5)
# Add a dot at the scalar position
ax.plot(scalar_value, 0.5, 'o', color='white', markersize=42)
ax.text(scalar_value, 0.5, f'{scalar_value:.2f}', color='black', ha='center', va='center', fontsize=14)
# Add labels rotated 90 degrees on the sides
ax.text(-0.03, 0.5, 'Close', ha='center', va='center', fontsize=14, rotation=90)
ax.text(1.03, 0.5, 'Distant', ha='center', va='center', fontsize=14, rotation=270)
# Customize the axis
ax.set_xticks([]) # Remove x-axis ticks
ax.set_yticks([]) # Remove y-axis ticks
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.set_title(distance_type)
# Remove spines for a cleaner look
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
return fig
# Show the plot
# plt.tight_layout()
# plt.show()
def load_json_from_path(path):
with open(path, "r", encoding="utf8") as f:
obj = json.loads(f.read())
return obj
class Measurer:
def __init__(self):
# learned dist
tree_lookup_path = "lang_1_to_lang_2_to_learned_dist.json"
self.learned_dist_func = load_json_from_path(tree_lookup_path)
# tree dist
tree_lookup_path = "lang_1_to_lang_2_to_tree_dist.json"
self.tree_dist_func = load_json_from_path(tree_lookup_path)
# map dist
map_lookup_path = "lang_1_to_lang_2_to_map_dist.json"
self.map_dist_func = load_json_from_path(map_lookup_path)
largest_value_map_dist = 0.0
for _, values in self.map_dist_func.items():
for _, value in values.items():
largest_value_map_dist = max(largest_value_map_dist, value)
for key1 in self.map_dist_func:
for key2 in self.map_dist_func[key1]:
self.map_dist_func[key1][key2] = self.map_dist_func[key1][key2] / largest_value_map_dist
# ASP
asp_dict_path = "asp_dict.pkl"
with open(asp_dict_path, 'rb') as dictfile:
asp_sim = pickle.load(dictfile)
lang_list = list(asp_sim.keys())
self.asp_dist_func = dict()
seen_langs = set()
for lang_1 in lang_list:
if lang_1 not in seen_langs:
seen_langs.add(lang_1)
self.asp_dist_func[lang_1] = dict()
for index, lang_2 in enumerate(lang_list):
if lang_2 not in seen_langs: # it's symmetric
self.asp_dist_func[lang_1][lang_2] = 1 - asp_sim[lang_1][index]
def get_dists(self, l1, l2):
if l1 in self.tree_dist_func:
if l2 in self.tree_dist_func[l1]:
tree_dist = self.tree_dist_func[l1][l2]
else:
tree_dist = self.tree_dist_func[l2][l1]
else:
tree_dist = self.tree_dist_func[l2][l1]
if l1 in self.map_dist_func:
if l2 in self.map_dist_func[l1]:
map_dist = self.map_dist_func[l1][l2]
else:
map_dist = self.map_dist_func[l2][l1]
else:
map_dist = self.map_dist_func[l2][l1]
if l1 in self.asp_dist_func:
if l2 in self.asp_dist_func[l1]:
asp_dist = self.asp_dist_func[l1][l2]
else:
asp_dist = self.asp_dist_func[l2][l1]
else:
asp_dist = self.asp_dist_func[l2][l1]
if l1 in self.learned_dist_func:
if l2 in self.learned_dist_func[l1]:
learned_dist = self.learned_dist_func[l1][l2]
else:
learned_dist = self.learned_dist_func[l2][l1]
else:
learned_dist = self.learned_dist_func[l2][l1]
return tree_dist, map_dist, asp_dist, learned_dist
def measure(self, l1, l2):
if l1 == l2:
f1 = plot_scalar_on_scale(0.0, f"Language Family Tree Distance between {l1} and {l2}")
f2 = plot_scalar_on_scale(0.0, f"Distance on the Globe between {l1} and {l2}")
f3 = plot_scalar_on_scale(0.0, f"Distance between Phoneme-Sets between {l1} and {l2}")
f4 = plot_scalar_on_scale(0.0, f"Machine-Learned Distance between {l1} and {l2}")
else:
tree_dist, map_dist, asp_dist, learned_dist = self.get_dists(l1.split(" ")[-1].split("(")[1].split(")")[0],
l2.split(" ")[-1].split("(")[1].split(")")[0])
f1 = plot_scalar_on_scale(tree_dist, f"Language Family Tree Distance between {l1} and {l2}")
f2 = plot_scalar_on_scale(map_dist, f"Distance on the Globe between {l1} and {l2}")
f3 = plot_scalar_on_scale(asp_dist, f"Distance between Phoneme-Sets between {l1} and {l2}")
f4 = plot_scalar_on_scale(learned_dist, f"Machine-Learned Distance between {l1} and {l2}")
return f1, f2, f3, f4
m = Measurer()
iso_to_name = load_json_from_path("iso_to_fullname.json")
text_selection = [f"{iso_to_name[iso_code]} ({iso_code})" for iso_code in iso_to_name]
iface = gr.Interface(fn=m.measure,
inputs=[gr.Dropdown(text_selection,
type="value",
value='English (eng)',
label="Select the fist Language (type on your keyboard to find it quickly)"),
gr.Dropdown(text_selection,
type="value",
value='German (deu)',
label="Select the second Language (type on your keyboard to find it quickly)")],
outputs=[gr.Plot(label="", show_label=False, format="png", container=True),
gr.Plot(label="", show_label=False, format="png", container=True),
gr.Plot(label="", show_label=False, format="png", container=True),
gr.Plot(label="", show_label=False, format="png", container=True)],
description="<br><br> This demo allows you to view the distance between two languages from the ISO 639-3 list according to several distance measurement functions. "
"For more information, check out our paper: https://arxiv.org/abs/2406.06403 and our text-to-speech tool, in which we make use of "
"this technique: https://github.com/DigitalPhonetics/IMS-Toucan <br><br>",
fill_width=True)
iface.launch()
|