Spaces:
Sleeping
Sleeping
import pickle | |
import numpy as np | |
from tqdm import tqdm | |
import edge_tts | |
import ast | |
import asyncio | |
class EdgeTTSGenerator: | |
""" | |
A class to generate podcast-style audio from a transcript using edge-tts. | |
""" | |
def __init__(self, transcript_file_path, output_audio_path): | |
""" | |
Initialize the TTS generator with the path to the rewritten transcript file. | |
Args: | |
transcript_file_path (str): Path to the file containing the rewritten transcript. | |
output_audio_path (str): Path to save the generated audio file. | |
""" | |
self.transcript_file_path = transcript_file_path | |
self.output_audio_path = output_audio_path | |
# Speaker descriptions for edge-tts voices | |
self.speaker1_voice = "en-US-AriaNeural" | |
self.speaker2_voice = "en-US-GuyNeural" | |
def load_transcript(self): | |
""" | |
Loads the rewritten transcript from the specified file. | |
Returns: | |
list: The content of the transcript as a list of tuples (speaker, text). | |
""" | |
with open(self.transcript_file_path, 'rb') as f: | |
return ast.literal_eval(pickle.load(f)) | |
async def generate_audio_segment(self, text, voice_name): | |
""" | |
Generate audio for a given text using edge-tts. | |
Args: | |
text (str): Text to be synthesized. | |
voice_name (str): The voice name to use for TTS. | |
Returns: | |
AudioSegment: Generated audio segment. | |
""" | |
communicator = edge_tts.Communicate(text, voice_name) | |
audio_bytes = b"" | |
async for chunk in communicator.stream(): | |
if "data" in chunk: # Check if 'data' exists in chunk | |
audio_bytes += chunk["data"] # Concatenate only the audio data | |
return audio_bytes | |
def save_audio(self, audio_data): | |
""" | |
Save the combined audio data to an output file. | |
Args: | |
audio_data (list): List of bytes containing the audio data for each segment. | |
""" | |
combined_audio = b"".join(audio_data) | |
with open(self.output_audio_path, "wb") as f: | |
f.write(combined_audio) | |
async def generate_audio(self): | |
""" | |
Converts the transcript into audio and saves it to a file. | |
Returns: | |
str: Path to the saved audio file. | |
""" | |
transcript = self.load_transcript() | |
audio_data = [] | |
for speaker, text in tqdm(transcript, desc="Generating podcast segments", unit="segment"): | |
voice = self.speaker1_voice if speaker == "Speaker 1" else self.speaker2_voice | |
segment_audio = await self.generate_audio_segment(text, voice) | |
audio_data.append(segment_audio) | |
self.save_audio(audio_data) | |
return self.output_audio_path | |