Spaces:
Running
Running
File size: 8,128 Bytes
1d08db8 c1e582f ada247c c1e582f 1d08db8 c573a1a 7d9eec3 c573a1a 1d08db8 c573a1a 1d08db8 eeb932b 1d08db8 d36cf56 1d08db8 c573a1a 1d08db8 c062b15 c1e582f 51a408d c1e582f 1d08db8 c062b15 1d08db8 eeb932b 1d08db8 eeb932b 1d08db8 388653c 1d08db8 388653c 1d08db8 e0f6a34 388653c 1d08db8 e0f6a34 1d08db8 e0f6a34 1d08db8 388653c 1d08db8 eeb932b 1d08db8 eeb932b 1d08db8 c1e582f 51a408d c1e582f 51a408d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 |
import requests
import time
import os
from datetime import datetime
import gradio as gr
from modules.utils.paths import TRANSLATION_OUTPUT_DIR, DEFAULT_PARAMETERS_CONFIG_PATH
from modules.utils.subtitle_manager import *
from modules.utils.files_manager import load_yaml, save_yaml
"""
This is written with reference to the DeepL API documentation.
If you want to know the information of the DeepL API, see here: https://www.deepl.com/docs-api/documents
"""
DEEPL_AVAILABLE_TARGET_LANGS = {
'Bulgarian': 'BG',
'Czech': 'CS',
'Danish': 'DA',
'German': 'DE',
'Greek': 'EL',
'English': 'EN',
'English (British)': 'EN-GB',
'English (American)': 'EN-US',
'Spanish': 'ES',
'Estonian': 'ET',
'Finnish': 'FI',
'French': 'FR',
'Hungarian': 'HU',
'Indonesian': 'ID',
'Italian': 'IT',
'Japanese': 'JA',
'Korean': 'KO',
'Lithuanian': 'LT',
'Latvian': 'LV',
'Norwegian (Bokmål)': 'NB',
'Dutch': 'NL',
'Polish': 'PL',
'Portuguese': 'PT',
'Portuguese (Brazilian)': 'PT-BR',
'Portuguese (all Portuguese varieties excluding Brazilian Portuguese)': 'PT-PT',
'Romanian': 'RO',
'Russian': 'RU',
'Slovak': 'SK',
'Slovenian': 'SL',
'Swedish': 'SV',
'Turkish': 'TR',
'Ukrainian': 'UK',
'Chinese (simplified)': 'ZH'
}
DEEPL_AVAILABLE_SOURCE_LANGS = {
'Automatic Detection': None,
'Bulgarian': 'BG',
'Czech': 'CS',
'Danish': 'DA',
'German': 'DE',
'Greek': 'EL',
'English': 'EN',
'Spanish': 'ES',
'Estonian': 'ET',
'Finnish': 'FI',
'French': 'FR',
'Hungarian': 'HU',
'Indonesian': 'ID',
'Italian': 'IT',
'Japanese': 'JA',
'Korean': 'KO',
'Lithuanian': 'LT',
'Latvian': 'LV',
'Norwegian (Bokmål)': 'NB',
'Dutch': 'NL',
'Polish': 'PL',
'Portuguese (all Portuguese varieties mixed)': 'PT',
'Romanian': 'RO',
'Russian': 'RU',
'Slovak': 'SK',
'Slovenian': 'SL',
'Swedish': 'SV',
'Turkish': 'TR',
'Ukrainian': 'UK',
'Chinese': 'ZH'
}
class DeepLAPI:
def __init__(self,
output_dir: str = TRANSLATION_OUTPUT_DIR
):
self.api_interval = 1
self.max_text_batch_size = 50
self.available_target_langs = DEEPL_AVAILABLE_TARGET_LANGS
self.available_source_langs = DEEPL_AVAILABLE_SOURCE_LANGS
self.output_dir = output_dir
def translate_deepl(self,
auth_key: str,
fileobjs: list,
source_lang: str,
target_lang: str,
is_pro: bool = False,
add_timestamp: bool = True,
progress=gr.Progress()) -> list:
"""
Translate subtitle files using DeepL API
Parameters
----------
auth_key: str
API Key for DeepL from gr.Textbox()
fileobjs: list
List of files to transcribe from gr.Files()
source_lang: str
Source language of the file to transcribe from gr.Dropdown()
target_lang: str
Target language of the file to transcribe from gr.Dropdown()
is_pro: str
Boolean value that is about pro user or not from gr.Checkbox().
add_timestamp: bool
Boolean value from gr.Checkbox() that determines whether to add a timestamp at the end of the filename.
progress: gr.Progress
Indicator to show progress directly in gradio.
Returns
----------
A List of
String to return to gr.Textbox()
Files to return to gr.Files()
"""
if fileobjs and isinstance(fileobjs[0], gr.utils.NamedString):
fileobjs = [fileobj.name for fileobj in fileobjs]
self.cache_parameters(
api_key=auth_key,
is_pro=is_pro,
source_lang=source_lang,
target_lang=target_lang,
add_timestamp=add_timestamp
)
files_info = {}
for fileobj in fileobjs:
file_path = fileobj
file_name, file_ext = os.path.splitext(os.path.basename(fileobj))
if file_ext == ".srt":
parsed_dicts = parse_srt(file_path=file_path)
elif file_ext == ".vtt":
parsed_dicts = parse_vtt(file_path=file_path)
batch_size = self.max_text_batch_size
for batch_start in range(0, len(parsed_dicts), batch_size):
batch_end = min(batch_start + batch_size, len(parsed_dicts))
sentences_to_translate = [dic["sentence"] for dic in parsed_dicts[batch_start:batch_end]]
translated_texts = self.request_deepl_translate(auth_key, sentences_to_translate, source_lang,
target_lang, is_pro)
for i, translated_text in enumerate(translated_texts):
parsed_dicts[batch_start + i]["sentence"] = translated_text["text"]
progress(batch_end / len(parsed_dicts), desc="Translating..")
if file_ext == ".srt":
subtitle = get_serialized_srt(parsed_dicts)
elif file_ext == ".vtt":
subtitle = get_serialized_vtt(parsed_dicts)
if add_timestamp:
timestamp = datetime.now().strftime("%m%d%H%M%S")
file_name += f"-{timestamp}"
output_path = os.path.join(self.output_dir, f"{file_name}{file_ext}")
write_file(subtitle, output_path)
files_info[file_name] = {"subtitle": subtitle, "path": output_path}
total_result = ''
for file_name, info in files_info.items():
total_result += '------------------------------------\n'
total_result += f'{file_name}\n\n'
total_result += f'{info["subtitle"]}'
gr_str = f"Done! Subtitle is in the outputs/translation folder.\n\n{total_result}"
output_file_paths = [item["path"] for key, item in files_info.items()]
return [gr_str, output_file_paths]
def request_deepl_translate(self,
auth_key: str,
text: list,
source_lang: str,
target_lang: str,
is_pro: bool = False):
"""Request API response to DeepL server"""
if source_lang not in list(DEEPL_AVAILABLE_SOURCE_LANGS.keys()):
raise ValueError(f"Source language {source_lang} is not supported."
f"Use one of {list(DEEPL_AVAILABLE_SOURCE_LANGS.keys())}")
if target_lang not in list(DEEPL_AVAILABLE_TARGET_LANGS.keys()):
raise ValueError(f"Target language {target_lang} is not supported."
f"Use one of {list(DEEPL_AVAILABLE_TARGET_LANGS.keys())}")
url = 'https://api.deepl.com/v2/translate' if is_pro else 'https://api-free.deepl.com/v2/translate'
headers = {
'Authorization': f'DeepL-Auth-Key {auth_key}'
}
data = {
'text': text,
'source_lang': DEEPL_AVAILABLE_SOURCE_LANGS[source_lang],
'target_lang': DEEPL_AVAILABLE_TARGET_LANGS[target_lang]
}
response = requests.post(url, headers=headers, data=data).json()
time.sleep(self.api_interval)
return response["translations"]
@staticmethod
def cache_parameters(api_key: str,
is_pro: bool,
source_lang: str,
target_lang: str,
add_timestamp: bool):
cached_params = load_yaml(DEFAULT_PARAMETERS_CONFIG_PATH)
cached_params["translation"]["deepl"] = {
"api_key": api_key,
"is_pro": is_pro,
"source_lang": source_lang,
"target_lang": target_lang
}
cached_params["translation"]["add_timestamp"] = add_timestamp
save_yaml(cached_params, DEFAULT_PARAMETERS_CONFIG_PATH)
|