NotebookLlamaGroq / generate_audio_edgetts.py
yasserrmd's picture
Update generate_audio_edgetts.py
59ab165 verified
raw
history blame
2.87 kB
import pickle
import numpy as np
from tqdm import tqdm
import edge_tts
import ast
import asyncio
class EdgeTTSGenerator:
"""
A class to generate podcast-style audio from a transcript using edge-tts.
"""
def __init__(self, transcript_file_path, output_audio_path):
"""
Initialize the TTS generator with the path to the rewritten transcript file.
Args:
transcript_file_path (str): Path to the file containing the rewritten transcript.
output_audio_path (str): Path to save the generated audio file.
"""
self.transcript_file_path = transcript_file_path
self.output_audio_path = output_audio_path
# Speaker descriptions for edge-tts voices
self.speaker1_voice = "en-US-AriaNeural"
self.speaker2_voice = "en-US-GuyNeural"
def load_transcript(self):
"""
Loads the rewritten transcript from the specified file.
Returns:
list: The content of the transcript as a list of tuples (speaker, text).
"""
with open(self.transcript_file_path, 'rb') as f:
return ast.literal_eval(pickle.load(f))
async def generate_audio_segment(self, text, voice_name):
"""
Generate audio for a given text using edge-tts.
Args:
text (str): Text to be synthesized.
voice_name (str): The voice name to use for TTS.
Returns:
AudioSegment: Generated audio segment.
"""
communicator = edge_tts.Communicate(text, voice_name)
audio_bytes = b""
async for chunk in communicator.stream():
if "data" in chunk: # Check if 'data' exists in chunk
audio_bytes += chunk["data"] # Concatenate only the audio data
return audio_bytes
def save_audio(self, audio_data):
"""
Save the combined audio data to an output file.
Args:
audio_data (list): List of bytes containing the audio data for each segment.
"""
combined_audio = b"".join(audio_data)
with open(self.output_audio_path, "wb") as f:
f.write(combined_audio)
async def generate_audio(self):
"""
Converts the transcript into audio and saves it to a file.
Returns:
str: Path to the saved audio file.
"""
transcript = self.load_transcript()
audio_data = []
for speaker, text in tqdm(transcript, desc="Generating podcast segments", unit="segment"):
voice = self.speaker1_voice if speaker == "Speaker 1" else self.speaker2_voice
segment_audio = await self.generate_audio_segment(text, voice)
audio_data.append(segment_audio)
self.save_audio(audio_data)
return self.output_audio_path