Mahiruoshi commited on
Commit
a04eaaf
·
verified ·
1 Parent(s): ccf8fa3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -315
app.py CHANGED
@@ -33,9 +33,14 @@ from config import config
33
  import torch
34
  import commons
35
  from text import cleaned_text_to_sequence, get_bert
 
 
 
36
  from text.cleaner import clean_text
37
  import utils
38
 
 
 
39
  from models import SynthesizerTrn
40
  from text.symbols import symbols
41
  import sys
@@ -66,6 +71,8 @@ webBase = {
66
  languages = [ "Auto", "ZH", "JP"]
67
  modelPaths = []
68
  modes = ['pyopenjtalk-V2.3']
 
 
69
  sentence_modes = ['sentence','paragraph']
70
 
71
  net_g = None
@@ -97,317 +104,35 @@ BandList = {
97
  "西克菲尔特音乐学院":["晶","未知留","八千代","栞","美帆"]
98
  }
99
 
100
- #翻译
101
-
102
- def translate(Sentence: str, to_Language: str = "jp", from_Language: str = ""):
103
- """
104
- :param Sentence: 待翻译语句
105
- :param from_Language: 待翻译语句语言
106
- :param to_Language: 目标语言
107
- :return: 翻译后语句 出错时返回None
108
- 常见语言代码:中文 zh 英语 en 日语 jp
109
- """
110
- appid = "20231117001883321"
111
- key = "lMQbvZHeJveDceLof2wf"
112
- if appid == "" or key == "":
113
- return "请开发者在config.yml中配置app_key与secret_key"
114
- url = "https://fanyi-api.baidu.com/api/trans/vip/translate"
115
- texts = Sentence.splitlines()
116
- outTexts = []
117
- for t in texts:
118
- if t != "":
119
- # 签名计算 参考文档 https://api.fanyi.baidu.com/product/113
120
- salt = str(random.randint(1, 100000))
121
- signString = appid + t + salt + key
122
- hs = hashlib.md5()
123
- hs.update(signString.encode("utf-8"))
124
- signString = hs.hexdigest()
125
- if from_Language == "":
126
- from_Language = "auto"
127
- headers = {"Content-Type": "application/x-www-form-urlencoded"}
128
- payload = {
129
- "q": t,
130
- "from": from_Language,
131
- "to": to_Language,
132
- "appid": appid,
133
- "salt": salt,
134
- "sign": signString,
135
- }
136
- # 发送请求
137
- try:
138
- response = requests.post(
139
- url=url, data=payload, headers=headers, timeout=3
140
- )
141
- response = response.json()
142
- if "trans_result" in response.keys():
143
- result = response["trans_result"][0]
144
- if "dst" in result.keys():
145
- dst = result["dst"]
146
- outTexts.append(dst)
147
- except Exception:
148
- return Sentence
149
- else:
150
- outTexts.append(t)
151
- return "\n".join(outTexts)
152
-
153
- #文本清洗工具
154
- def is_japanese(string):
155
- for ch in string:
156
- if ord(ch) > 0x3040 and ord(ch) < 0x30FF:
157
- return True
158
- return False
159
-
160
- def is_chinese(string):
161
- for ch in string:
162
- if '\u4e00' <= ch <= '\u9fff':
163
- return True
164
- return False
165
-
166
- def is_single_language(sentence):
167
- # 检查句子是否为单一语言
168
- contains_chinese = re.search(r'[\u4e00-\u9fff]', sentence) is not None
169
- contains_japanese = re.search(r'[\u3040-\u30ff\u31f0-\u31ff]', sentence) is not None
170
- contains_english = re.search(r'[a-zA-Z]', sentence) is not None
171
- language_count = sum([contains_chinese, contains_japanese, contains_english])
172
- return language_count == 1
173
-
174
- def merge_scattered_parts(sentences):
175
- """合并零散的部分到相邻的句子中,并确保单一语言性"""
176
- merged_sentences = []
177
- buffer_sentence = ""
178
-
179
- for sentence in sentences:
180
- # 检查是否是单一语言或者太短(可能是标点或单个词)
181
- if is_single_language(sentence) and len(sentence) > 1:
182
- # 如果缓冲区有内容,先将缓冲区的内容添加到列表
183
- if buffer_sentence:
184
- merged_sentences.append(buffer_sentence)
185
- buffer_sentence = ""
186
- merged_sentences.append(sentence)
187
- else:
188
- # 如果是零散的部分,将其添加到缓冲区
189
- buffer_sentence += sentence
190
-
191
- # 确保最后的缓冲区内容被添加
192
- if buffer_sentence:
193
- merged_sentences.append(buffer_sentence)
194
-
195
- return merged_sentences
196
-
197
- def is_only_punctuation(s):
198
- """检查字符串是否只包含标点符号"""
199
- # 此处列出中文、日文、英文常见标点符号
200
- punctuation_pattern = re.compile(r'^[\s。*;,:“”()、!?《》\u3000\.,;:"\'?!()]+$')
201
- return punctuation_pattern.match(s) is not None
202
-
203
- def split_mixed_language(sentence):
204
- # 分割混合语言句子
205
- # 逐字符检查,分割不同语言部分
206
- sub_sentences = []
207
- current_language = None
208
- current_part = ""
209
-
210
- for char in sentence:
211
- if re.match(r'[\u4e00-\u9fff]', char): # Chinese character
212
- if current_language != 'chinese':
213
- if current_part:
214
- sub_sentences.append(current_part)
215
- current_part = char
216
- current_language = 'chinese'
217
- else:
218
- current_part += char
219
- elif re.match(r'[\u3040-\u30ff\u31f0-\u31ff]', char): # Japanese character
220
- if current_language != 'japanese':
221
- if current_part:
222
- sub_sentences.append(current_part)
223
- current_part = char
224
- current_language = 'japanese'
225
- else:
226
- current_part += char
227
- elif re.match(r'[a-zA-Z]', char): # English character
228
- if current_language != 'english':
229
- if current_part:
230
- sub_sentences.append(current_part)
231
- current_part = char
232
- current_language = 'english'
233
- else:
234
- current_part += char
235
  else:
236
- current_part += char # For punctuation and other characters
237
-
238
- if current_part:
239
- sub_sentences.append(current_part)
240
-
241
- return sub_sentences
242
-
243
- def replace_quotes(text):
244
- # 替换中文、日文引号为英文引号
245
- text = re.sub(r'[“”‘’『』「」()()]', '"', text)
246
- return text
247
-
248
- def remove_numeric_annotations(text):
249
- # 定义用于匹配数字注释的正则表达式
250
- # 包括 “”、【】和〔〕包裹的数字
251
- pattern = r'“\d+”|【\d+】|〔\d+〕'
252
- # 使用正则表达式替换掉这些注释
253
- cleaned_text = re.sub(pattern, '', text)
254
- return cleaned_text
255
-
256
- def merge_adjacent_japanese(sentences):
257
- """合并相邻且都只包含日语的句子"""
258
- merged_sentences = []
259
- i = 0
260
- while i < len(sentences):
261
- current_sentence = sentences[i]
262
- if i + 1 < len(sentences) and is_japanese(current_sentence) and is_japanese(sentences[i + 1]):
263
- # 当前句子和下一句都是日语,合并它们
264
- while i + 1 < len(sentences) and is_japanese(sentences[i + 1]):
265
- current_sentence += sentences[i + 1]
266
- i += 1
267
- merged_sentences.append(current_sentence)
268
- i += 1
269
- return merged_sentences
270
-
271
- def extrac(text):
272
- text = replace_quotes(remove_numeric_annotations(text)) # 替换引号
273
- text = re.sub("<[^>]*>", "", text) # 移除 HTML 标签
274
- # 使用换行符和标点符号进行初步分割
275
- preliminary_sentences = re.split(r'([\n。;!?\.\?!])', text)
276
- final_sentences = []
277
-
278
- preliminary_sentences = re.split(r'([\n。;!?\.\?!])', text)
279
-
280
- for piece in preliminary_sentences:
281
- if is_single_language(piece):
282
- final_sentences.append(piece)
283
- else:
284
- sub_sentences = split_mixed_language(piece)
285
- final_sentences.extend(sub_sentences)
286
-
287
- # 处理长句子,使用jieba进行分词
288
- split_sentences = []
289
- for sentence in final_sentences:
290
- split_sentences.extend(split_long_sentences(sentence))
291
-
292
- # 合并相邻的日语句子
293
- merged_japanese_sentences = merge_adjacent_japanese(split_sentences)
294
-
295
- # 剔除只包含标点符号的元素
296
- clean_sentences = [s for s in merged_japanese_sentences if not is_only_punctuation(s)]
297
-
298
- # 移除空字符串并去除多余引号
299
- return [s.replace('"','').strip() for s in clean_sentences if s]
300
-
301
-
302
-
303
- # 移除空字符串
304
-
305
- def is_mixed_language(sentence):
306
- contains_chinese = re.search(r'[\u4e00-\u9fff]', sentence) is not None
307
- contains_japanese = re.search(r'[\u3040-\u30ff\u31f0-\u31ff]', sentence) is not None
308
- contains_english = re.search(r'[a-zA-Z]', sentence) is not None
309
- languages_count = sum([contains_chinese, contains_japanese, contains_english])
310
- return languages_count > 1
311
-
312
- def split_mixed_language(sentence):
313
- # 分割混合语言句子
314
- sub_sentences = re.split(r'(?<=[。!?\.\?!])(?=")|(?<=")(?=[\u4e00-\u9fff\u3040-\u30ff\u31f0-\u31ff]|[a-zA-Z])', sentence)
315
- return [s.strip() for s in sub_sentences if s.strip()]
316
-
317
- def seconds_to_ass_time(seconds):
318
- """将秒数转换为ASS时间格式"""
319
- hours = int(seconds / 3600)
320
- minutes = int((seconds % 3600) / 60)
321
- seconds = int(seconds) % 60
322
- milliseconds = int((seconds - int(seconds)) * 1000)
323
- return "{:01d}:{:02d}:{:02d}.{:02d}".format(hours, minutes, seconds, int(milliseconds / 10))
324
-
325
- def extract_text_from_epub(file_path):
326
- book = epub.read_epub(file_path)
327
- content = []
328
- for item in book.items:
329
- if isinstance(item, epub.EpubHtml):
330
- soup = BeautifulSoup(item.content, 'html.parser')
331
- content.append(soup.get_text())
332
- return '\n'.join(content)
333
-
334
- def extract_text_from_pdf(file_path):
335
- with open(file_path, 'rb') as file:
336
- reader = PdfReader(file)
337
- content = [page.extract_text() for page in reader.pages]
338
- return '\n'.join(content)
339
-
340
- def remove_annotations(text):
341
- # 移除方括号、尖括号和中文方括号中的内容
342
- text = re.sub(r'\[.*?\]', '', text)
343
- text = re.sub(r'\<.*?\>', '', text)
344
- text = re.sub(r'&#8203;``【oaicite:1】``&#8203;', '', text)
345
- return text
346
-
347
- def extract_text_from_file(inputFile):
348
- file_extension = os.path.splitext(inputFile)[1].lower()
349
- if file_extension == ".epub":
350
- return extract_text_from_epub(inputFile)
351
- elif file_extension == ".pdf":
352
- return extract_text_from_pdf(inputFile)
353
- elif file_extension == ".txt":
354
- with open(inputFile, 'r', encoding='utf-8') as f:
355
- return f.read()
356
- else:
357
- raise ValueError(f"Unsupported file format: {file_extension}")
358
-
359
- def split_by_punctuation(sentence):
360
- """按照中文次级标点符号分割句子"""
361
- # 常见的中文次级分隔符号:逗号、分号等
362
- parts = re.split(r'([,,;;])', sentence)
363
- # 将标点符号与前面的词语合并,避免单独标点符号成为一个部分
364
- merged_parts = []
365
- for part in parts:
366
- if part and not part in ',,;;':
367
- merged_parts.append(part)
368
- elif merged_parts:
369
- merged_parts[-1] += part
370
- return merged_parts
371
-
372
- def split_long_sentences(sentence, max_length=30):
373
- """如果中文句子太长,先按标点分割,必要时使用jieba进行分词并分割"""
374
- if len(sentence) > max_length and is_chinese(sentence):
375
- # 首先尝试按照次级标点符号分割
376
- preliminary_parts = split_by_punctuation(sentence)
377
- new_sentences = []
378
-
379
- for part in preliminary_parts:
380
- # 如果部分仍然太长,使用jieba进行分词
381
- if len(part) > max_length:
382
- words = jieba.lcut(part)
383
- current_sentence = ""
384
- for word in words:
385
- if len(current_sentence) + len(word) > max_length:
386
- new_sentences.append(current_sentence)
387
- current_sentence = word
388
- else:
389
- current_sentence += word
390
- if current_sentence:
391
- new_sentences.append(current_sentence)
392
- else:
393
- new_sentences.append(part)
394
-
395
- return new_sentences
396
- return [sentence] # 如果句子不长或不是中文,直接返回
397
 
398
- def extract_and_convert(text):
399
-
400
- # 使用正则表达式找出所有英文单词
401
- english_parts = re.findall(r'\b[A-Za-z]+\b', text) # \b为单词边界标识
402
-
403
- # 对每个英文单词进行片假名转换
404
- kana_parts = ['\n{}\n'.format(romajitable.to_kana(word).katakana) for word in english_parts]
405
-
406
- # 替换原文本中的英文部分
407
- for eng, kana in zip(english_parts, kana_parts):
408
- text = text.replace(eng, kana, 1) # 限制每次只替换一个实例
409
-
410
- return text
411
 
412
  def get_net_g(model_path: str, device: str, hps):
413
  net_g = SynthesizerTrn(
@@ -480,6 +205,8 @@ def infer(
480
  if style_text == None:
481
  style_text = ""
482
  style_weight=0,
 
 
483
  if language == "JP":
484
  text = translate(text,"jp")
485
  if language == "ZH":
@@ -668,7 +395,7 @@ def generate_audio(
668
  if inputFile:
669
  text = extract_text_from_file(inputFile.name)
670
  sentence_mode = 'paragraph'
671
- if mode == 'pyopenjtalk-V2.3':
672
  if sentence_mode == 'sentence':
673
  audio = infer(
674
  text,
@@ -751,6 +478,9 @@ def generate_audio(
751
  return file_path
752
 
753
  if __name__ == "__main__":
 
 
 
754
  for dirpath, dirnames, filenames in os.walk('Data/BangDream/models/'):
755
  for filename in filenames:
756
  modelPaths.append(os.path.join(dirpath, filename))
@@ -762,9 +492,11 @@ if __name__ == "__main__":
762
  speakers = list(speaker_ids.keys())
763
  with gr.Blocks() as app:
764
  gr.Markdown(value="""
 
 
 
765
  ([Bert-Vits2](https://github.com/Stardust-minus/Bert-VITS2) V2.3)少歌邦邦全员在线语音合成\n
766
  [好玩的](http://love.soyorin.top/)\n
767
- 该界面的真实链接(国内可用): https://mahiruoshi-bangdream-bert-vits2.hf.space/\n
768
  API: https://mahiruoshi-bert-vits2-api.hf.space/ \n
769
  调用方式: https://mahiruoshi-bert-vits2-api.hf.space/?text={{speakText}}&speaker=chosen_speaker\n
770
  推荐搭配[Legado开源阅读](https://github.com/gedoor/legado)或[聊天bot](https://github.com/Paraworks/BangDreamAi)使用\n
@@ -809,13 +541,13 @@ if __name__ == "__main__":
809
  choices=modes, value="pyopenjtalk-V2.3", label="TTS模式,合成少歌角色需要切换成 pyopenjtalk-V2.3-Katakana "
810
  )
811
  sentence_mode = gr.Dropdown(
812
- choices=sentence_modes, value="paragraph", label="文本合成模式"
813
  )
814
  with gr.Accordion(label="扩展选项", open=False):
815
  inputFile = gr.UploadButton(label="txt文件输入")
816
  speakerList = gr.TextArea(
817
  label="角色对应表,如果你记不住角色名可以这样,左边是你想要在每一句话合成中用到的speaker(见角色清单)右边是你上传文本时分隔符左边设置的说话人:{ChoseSpeakerFromConfigList}|{SeakerInUploadText}",
818
- value = "ましろ|真白\n七深|七深\n透子|透子\nつくし|筑紫\n瑠唯|瑠唯\nそよ|素世\n祥子|祥子",
819
  )
820
  groupSize = gr.Slider(
821
  minimum=10, maximum=1000 if torch.cuda.is_available() else 50,value = 50, step=1, label="单个音频文件包含的最大句子数"
@@ -835,8 +567,8 @@ if __name__ == "__main__":
835
  text = gr.TextArea(
836
  label="文本输入,可用'|'分割说话人和文本,注意换行",
837
  info="输入纯日语或者中文",
838
- #placeholder=f"{name}|你觉得你是职业歌手吗\n真白|我觉得我是",
839
- value=f"{name}|你觉得你是职业歌手吗\n真白|我觉得我是"
840
  )
841
  style_text = gr.Textbox(
842
  label="情感辅助文本",
 
33
  import torch
34
  import commons
35
  from text import cleaned_text_to_sequence, get_bert
36
+
37
+ from tools.sentence import extrac, is_japanese, is_chinese, seconds_to_ass_time, extract_text_from_file, remove_annotations,extract_and_convert
38
+
39
  from text.cleaner import clean_text
40
  import utils
41
 
42
+ from tools.translate import translate
43
+
44
  from models import SynthesizerTrn
45
  from text.symbols import symbols
46
  import sys
 
71
  languages = [ "Auto", "ZH", "JP"]
72
  modelPaths = []
73
  modes = ['pyopenjtalk-V2.3']
74
+ if torch.cuda.is_available():
75
+ modes = ['pyopenjtalk-V2.3','fugashi-V2.3']
76
  sentence_modes = ['sentence','paragraph']
77
 
78
  net_g = None
 
104
  "西克菲尔特音乐学院":["晶","未知留","八千代","栞","美帆"]
105
  }
106
 
107
+ # 推理工具
108
+ def download_unidic():
109
+ try:
110
+ Tagger()
111
+ print("Tagger launch successfully.")
112
+ except Exception as e:
113
+ print("UNIDIC dictionary not found, downloading...")
114
+ subprocess.run([sys.executable, "-m", "unidic", "download"])
115
+ print("Download completed.")
116
+
117
+ def kanji_to_hiragana(text):
118
+ global tagger
119
+ output = ""
120
+
121
+ # 更新正则表达式以更准确地区分文本和标点符号
122
+ segments = re.findall(r'[一-龥ぁ-んァ-ン\w]+|[^\一-龥ぁ-んァ-ン\w\s]', text, re.UNICODE)
123
+
124
+ for segment in segments:
125
+ if re.match(r'[一-龥ぁ-んァ-ン\w]+', segment):
126
+ # 如果是单词或汉字,转换为平假名
127
+ for word in tagger(segment):
128
+ kana = word.feature.kana or word.surface
129
+ hiragana = jaconv.kata2hira(kana) # 将片假名转换为平假名
130
+ output += hiragana
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  else:
132
+ # 如果是标点符号,保持不变
133
+ output += segment
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
+ return output
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
  def get_net_g(model_path: str, device: str, hps):
138
  net_g = SynthesizerTrn(
 
205
  if style_text == None:
206
  style_text = ""
207
  style_weight=0,
208
+ if mode == 'fugashi-V2.3':
209
+ text = kanji_to_hiragana(text) if is_japanese(text) else text
210
  if language == "JP":
211
  text = translate(text,"jp")
212
  if language == "ZH":
 
395
  if inputFile:
396
  text = extract_text_from_file(inputFile.name)
397
  sentence_mode = 'paragraph'
398
+ if mode == 'pyopenjtalk-V2.3' or mode == 'fugashi-V2.3':
399
  if sentence_mode == 'sentence':
400
  audio = infer(
401
  text,
 
478
  return file_path
479
 
480
  if __name__ == "__main__":
481
+ if torch.cuda.is_available():
482
+ download_unidic()
483
+ tagger = Tagger()
484
  for dirpath, dirnames, filenames in os.walk('Data/BangDream/models/'):
485
  for filename in filenames:
486
  modelPaths.append(os.path.join(dirpath, filename))
 
492
  speakers = list(speaker_ids.keys())
493
  with gr.Blocks() as app:
494
  gr.Markdown(value="""
495
+ [日语特化版(推荐)](https://huggingface.co/spaces/Mahiruoshi/BangStarlight),国内可用连接: https://mahiruoshi-BangStarlight.hf.space/\n
496
+ [假名标注版](https://huggingface.co/spaces/Mahiruoshi/MyGO_VIts-bert),国内可用连接: https://mahiruoshi-MyGO-VIts-bert.hf.space/\n
497
+ 该界面的真实链接(国内可用): https://mahiruoshi-bangdream-bert-vits2.hf.space/\n
498
  ([Bert-Vits2](https://github.com/Stardust-minus/Bert-VITS2) V2.3)少歌邦邦全员在线语音合成\n
499
  [好玩的](http://love.soyorin.top/)\n
 
500
  API: https://mahiruoshi-bert-vits2-api.hf.space/ \n
501
  调用方式: https://mahiruoshi-bert-vits2-api.hf.space/?text={{speakText}}&speaker=chosen_speaker\n
502
  推荐搭配[Legado开源阅读](https://github.com/gedoor/legado)或[聊天bot](https://github.com/Paraworks/BangDreamAi)使用\n
 
541
  choices=modes, value="pyopenjtalk-V2.3", label="TTS模式,合成少歌角色需要切换成 pyopenjtalk-V2.3-Katakana "
542
  )
543
  sentence_mode = gr.Dropdown(
544
+ choices=sentence_modes, value="sentence", label="文本合成模式"
545
  )
546
  with gr.Accordion(label="扩展选项", open=False):
547
  inputFile = gr.UploadButton(label="txt文件输入")
548
  speakerList = gr.TextArea(
549
  label="角色对应表,如果你记不住角色名可以这样,左边是你想要在每一句话合成中用到的speaker(见角色清单)右边是你上传文本时分隔符左边设置的说话人:{ChoseSpeakerFromConfigList}|{SeakerInUploadText}",
550
+ value = "ましろ|天音\n七深|七深\n透子|透子\nつくし|筑紫\n瑠唯|瑠唯\nそよ|素世\n祥子|祥子",
551
  )
552
  groupSize = gr.Slider(
553
  minimum=10, maximum=1000 if torch.cuda.is_available() else 50,value = 50, step=1, label="单个音频文件包含的最大句子数"
 
567
  text = gr.TextArea(
568
  label="文本输入,可用'|'分割说话人和文本,注意换行",
569
  info="输入纯日语或者中文",
570
+ value=f"{name}|你是职业歌手吗\n天音|我觉得我是",
571
+ placeholder=f"私は{name}です、あの子はだれ? "
572
  )
573
  style_text = gr.Textbox(
574
  label="情感辅助文本",