Spaces:

amir22010
/

MarketMate

Sleeping

amir22010 commited on Oct 19, 2024

Commit

d5c8eb9

verified ·

1 Parent(s): c7fc8ee

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,20 +8,23 @@ import wave
 #tts
 import tempfile
 import torchaudio
-from speechbrain.inference.TTS import FastSpeech2
 from speechbrain.inference.vocoders import HIFIGAN
-fastspeech2 = FastSpeech2.from_hparams(source="speechbrain/tts-fastspeech2-ljspeech", savedir="pretrained_models/tts-fastspeech2-ljspeech")
 hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="pretrained_models/tts-hifigan-ljspeech")
 def text_to_speech(text):
     with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_file:
-        mel_output, durations, pitch, energy = fastspeech2.encode_text(
-          [text],
-          pace=1.0,        # scale up/down the speed
-          pitch_rate=1.0,  # scale up/down the pitch
-          energy_rate=1.0, # scale up/down the energy
-        )
         # Running Vocoder (spectrogram-to-waveform)
         waveforms = hifi_gan.decode_batch(mel_output)
         # Save the waverform

 #tts
 import tempfile
 import torchaudio
+#from speechbrain.inference.TTS import FastSpeech2
+from speechbrain.inference.TTS import Tacotron2
 from speechbrain.inference.vocoders import HIFIGAN
+#fastspeech2 = FastSpeech2.from_hparams(source="speechbrain/tts-fastspeech2-ljspeech", savedir="pretrained_models/tts-fastspeech2-ljspeech")
+tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmpdir_tts")
 hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="pretrained_models/tts-hifigan-ljspeech")
 def text_to_speech(text):
     with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_file:
+        # mel_output, durations, pitch, energy = fastspeech2.encode_text(
+        #   [text],
+        #   pace=1.0,        # scale up/down the speed
+        #   pitch_rate=1.0,  # scale up/down the pitch
+        #   energy_rate=1.0, # scale up/down the energy
+        # )
+        mel_output, mel_length, alignment = tacotron2.encode_text(text)
         # Running Vocoder (spectrogram-to-waveform)
         waveforms = hifi_gan.decode_batch(mel_output)
         # Save the waverform