ayush2607 commited on
Commit
5104349
·
verified ·
1 Parent(s): 84fda9a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -3
app.py CHANGED
@@ -15,13 +15,27 @@ vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
15
  embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
16
  speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  def text_to_speech(text):
19
  inputs = processor(text=text, return_tensors="pt")
20
  speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
21
-
22
  output_path = "output.wav"
23
  sf.write(output_path, speech.numpy(), samplerate=16000)
24
-
25
  return output_path
26
 
27
  # Create Gradio interface
@@ -30,7 +44,7 @@ iface = gr.Interface(
30
  inputs=gr.Textbox(label="Enter text to convert to speech"),
31
  outputs=gr.Audio(label="Generated Speech"),
32
  title="Text-to-Speech Converter",
33
- description="Convert text to speech using the SpeechT5 model."
34
  )
35
 
36
  # Launch the app
 
15
  embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
16
  speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
17
 
18
+ # Quantize the models
19
+ def quantize_model(model):
20
+ quantized_model = torch.quantization.quantize_dynamic(
21
+ model, {torch.nn.Linear}, dtype=torch.qint8
22
+ )
23
+ return quantized_model
24
+
25
+ model = quantize_model(model)
26
+ vocoder = quantize_model(vocoder)
27
+
28
+ # JIT compile the models for faster inference
29
+ model = torch.jit.script(model)
30
+ vocoder = torch.jit.script(vocoder)
31
+
32
+ # Use inference mode for faster computation
33
+ @torch.inference_mode()
34
  def text_to_speech(text):
35
  inputs = processor(text=text, return_tensors="pt")
36
  speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
 
37
  output_path = "output.wav"
38
  sf.write(output_path, speech.numpy(), samplerate=16000)
 
39
  return output_path
40
 
41
  # Create Gradio interface
 
44
  inputs=gr.Textbox(label="Enter text to convert to speech"),
45
  outputs=gr.Audio(label="Generated Speech"),
46
  title="Text-to-Speech Converter",
47
+ description="Convert text to speech using the optimized SpeechT5 model."
48
  )
49
 
50
  # Launch the app