TTS_DATASET_MAKER_2

Sleeping

App Files Files Community

Omarrran commited on Nov 10, 2024

Commit

0b8958e

verified ·

1 Parent(s): d7e5954

Update app.py

Browse files

Files changed (1) hide show

app.py +277 -143

app.py CHANGED Viewed

@@ -1,17 +1,19 @@
 """
-TTS Dataset Collection Tool with Font Support and Enhanced Error Handling
 """
 import os
 import json
 import nltk
 import gradio as gr
 from datetime import datetime
 from pathlib import Path
-import shutil
 import logging
-from typing import Dict, List, Tuple, Optional
 import traceback
 # Download NLTK data during initialization
 try:
@@ -43,12 +45,12 @@ logger = logging.getLogger(__name__)
 FONT_STYLES = {
     "english_serif": {
         "name": "Times New Roman",
-        "family": "serif",
         "css": "font-family: 'Times New Roman', serif;"
     },
     "english_sans": {
         "name": "Arial",
-        "family": "sans-serif",
         "css": "font-family: Arial, sans-serif;"
     },
     "nastaliq": {
@@ -66,80 +68,82 @@ FONT_STYLES = {
 class TTSDatasetCollector:
     """Manages TTS dataset collection and organization with enhanced features"""
     def __init__(self):
         """Initialize the TTS Dataset Collector"""
         self.root_path = Path(os.path.dirname(os.path.abspath(__file__))) / "dataset"
         self.sentences = []
         self.current_index = 0
         self.current_font = "english_serif"
         self.setup_directories()
         # Ensure NLTK data is downloaded
         try:
             nltk.data.find('tokenizers/punkt')
         except LookupError:
             nltk.download('punkt', quiet=True)
         logger.info("TTS Dataset Collector initialized")
     def setup_directories(self) -> None:
         """Create necessary directory structure with logging"""
         try:
             # Create main dataset directory
-            self.root_path.mkdir(exist_ok=True)
             # Create subdirectories
             for subdir in ['audio', 'transcriptions', 'metadata', 'fonts']:
-                (self.root_path / subdir).mkdir(exist_ok=True)
             # Initialize log file
             log_file = self.root_path / 'dataset_log.txt'
             if not log_file.exists():
                 with open(log_file, 'w', encoding='utf-8') as f:
                     f.write(f"Dataset collection initialized on {datetime.now().isoformat()}\n")
             logger.info("Directory structure created successfully")
         except Exception as e:
             logger.error(f"Failed to create directory structure: {str(e)}")
             logger.error(traceback.format_exc())
             raise RuntimeError("Failed to initialize directory structure")
     def log_operation(self, message: str, level: str = "info") -> None:
         """Log operations with timestamp and level"""
         try:
             log_file = self.root_path / 'dataset_log.txt'
             timestamp = datetime.now().isoformat()
             with open(log_file, 'a', encoding='utf-8') as f:
                 f.write(f"[{timestamp}] [{level.upper()}] {message}\n")
             if level.lower() == "error":
                 logger.error(message)
             else:
                 logger.info(message)
         except Exception as e:
             logger.error(f"Failed to log operation: {str(e)}")
     def process_text(self, text: str) -> Tuple[bool, str]:
         """Process pasted or loaded text with error handling"""
         try:
             if not text.strip():
                 return False, "Text is empty"
             # Simple sentence splitting as fallback
             def simple_split_sentences(text):
                 # Split on common sentence endings
                 sentences = []
                 current = []
                 for line in text.split('\n'):
                     line = line.strip()
                     if not line:
                         continue
                     # Split on common sentence endings
                     parts = line.replace('!', '.').replace('?', '.').split('.')
                     for part in parts:
@@ -148,12 +152,12 @@ class TTSDatasetCollector:
                             current.append(part)
                             sentences.append(' '.join(current))
                             current = []
                 if current:
                     sentences.append(' '.join(current))
                 return [s.strip() for s in sentences if s.strip()]
             try:
                 # Try NLTK first
                 self.sentences = nltk.sent_tokenize(text.strip())
@@ -161,16 +165,16 @@ class TTSDatasetCollector:
                 logger.warning(f"NLTK tokenization failed, falling back to simple splitting: {str(e)}")
                 # Fallback to simple splitting
                 self.sentences = simple_split_sentences(text.strip())
             if not self.sentences:
                 return False, "No valid sentences found in text"
             self.current_index = 0
             # Log success
             self.log_operation(f"Processed text with {len(self.sentences)} sentences")
             return True, f"Successfully loaded {len(self.sentences)} sentences"
         except Exception as e:
             error_msg = f"Error processing text: {str(e)}"
             self.log_operation(error_msg, "error")
@@ -181,17 +185,17 @@ class TTSDatasetCollector:
         """Process and load text file with enhanced error handling"""
         if not file:
             return False, "No file provided"
         try:
             # Validate file extension
             if not file.name.endswith('.txt'):
                 return False, "Only .txt files are supported"
             with open(file.name, 'r', encoding='utf-8') as f:
                 text = f.read()
             return self.process_text(text)
         except UnicodeDecodeError:
             error_msg = "File encoding error. Please ensure the file is UTF-8 encoded"
             self.log_operation(error_msg, "error")
@@ -209,55 +213,111 @@ class TTSDatasetCollector:
     def set_font(self, font_style: str) -> Tuple[bool, str]:
         """Set the current font style"""
-        if font_style not in FONT_STYLES:
-            return False, f"Invalid font style. Available styles: {', '.join(FONT_STYLES.keys())}"
         self.current_font = font_style
         return True, f"Font style set to {font_style}"
-    def generate_filenames(self, dataset_name: str, speaker_id: str) -> Tuple[str, str]:
         """Generate unique filenames for audio and text files"""
         timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
-        sentence_id = f"{self.current_index+1:04d}"
-        base_name = f"{dataset_name}_{speaker_id}_{sentence_id}_{timestamp}"
         return f"{base_name}.wav", f"{base_name}.txt"
     def save_recording(self, audio_file, speaker_id: str, dataset_name: str) -> Tuple[bool, str]:
         """Save recording with enhanced error handling and logging"""
         if not all([audio_file, speaker_id, dataset_name]):
             missing = []
-            if not audio_file: missing.append("audio recording")
-            if not speaker_id: missing.append("speaker ID")
-            if not dataset_name: missing.append("dataset name")
             return False, f"Missing required information: {', '.join(missing)}"
         try:
             # Validate inputs
             if not speaker_id.strip().isalnum():
                 return False, "Speaker ID must contain only letters and numbers"
             if not dataset_name.strip().isalnum():
                 return False, "Dataset name must contain only letters and numbers"
             # Generate filenames
-            audio_name, text_name = self.generate_filenames(dataset_name, speaker_id)
             # Create speaker directories
             audio_dir = self.root_path / 'audio' / speaker_id
             text_dir = self.root_path / 'transcriptions' / speaker_id
-            audio_dir.mkdir(exist_ok=True)
-            text_dir.mkdir(exist_ok=True)
             # Save audio file
             audio_path = audio_dir / audio_name
-            shutil.copy2(audio_file, audio_path)
             # Save transcription
             text_path = text_dir / text_name
             self.save_transcription(
                 text_path,
-                self.sentences[self.current_index],
                 {
                     'speaker_id': speaker_id,
                     'dataset_name': dataset_name,
@@ -266,24 +326,24 @@ class TTSDatasetCollector:
                     'font_style': self.current_font
                 }
             )
             # Update metadata
             self.update_metadata(speaker_id, dataset_name)
             # Log success
             self.log_operation(
                 f"Saved recording: Speaker={speaker_id}, Dataset={dataset_name}, "
                 f"Audio={audio_name}, Text={text_name}"
             )
             return True, f"Recording saved successfully as {audio_name}"
         except Exception as e:
             error_msg = f"Error saving recording: {str(e)}"
             self.log_operation(error_msg, "error")
             logger.error(traceback.format_exc())
             return False, error_msg
     def save_transcription(self, file_path: Path, text: str, metadata: Dict) -> None:
         """Save transcription with metadata"""
         content = f"""[METADATA]
@@ -298,60 +358,64 @@ Font_Style: {metadata['font_style']}
 """
         with open(file_path, 'w', encoding='utf-8') as f:
             f.write(content)
     def update_metadata(self, speaker_id: str, dataset_name: str) -> None:
         """Update dataset metadata with error handling"""
         metadata_file = self.root_path / 'metadata' / 'dataset_info.json'
         try:
             if metadata_file.exists():
                 with open(metadata_file, 'r') as f:
                     metadata = json.load(f)
             else:
                 metadata = {'speakers': {}, 'last_updated': None}
             # Update speaker data
             if speaker_id not in metadata['speakers']:
                 metadata['speakers'][speaker_id] = {
                     'total_recordings': 0,
                     'datasets': {}
                 }
             if dataset_name not in metadata['speakers'][speaker_id]['datasets']:
                 metadata['speakers'][speaker_id]['datasets'][dataset_name] = {
                     'recordings': 0,
                     'sentences': len(self.sentences),
                     'first_recording': datetime.now().isoformat(),
                     'last_recording': None,
                     'font_styles_used': []
                 }
             # Update counts and timestamps
             metadata['speakers'][speaker_id]['total_recordings'] += 1
             metadata['speakers'][speaker_id]['datasets'][dataset_name]['recordings'] += 1
             metadata['speakers'][speaker_id]['datasets'][dataset_name]['last_recording'] = \
                 datetime.now().isoformat()
             # Update font styles
             if self.current_font not in metadata['speakers'][speaker_id]['datasets'][dataset_name]['font_styles_used']:
                 metadata['speakers'][speaker_id]['datasets'][dataset_name]['font_styles_used'].append(
                     self.current_font
                 )
             metadata['last_updated'] = datetime.now().isoformat()
             # Save updated metadata
             with open(metadata_file, 'w') as f:
                 json.dump(metadata, f, indent=2)
             self.log_operation(f"Updated metadata for {speaker_id} in {dataset_name}")
         except Exception as e:
             error_msg = f"Error updating metadata: {str(e)}"
             self.log_operation(error_msg, "error")
             logger.error(traceback.format_exc())
-        # Add these methods to the TTSDatasetCollector class
     def get_navigation_info(self) -> Dict[str, Optional[str]]:
         """Get current and next sentence information"""
         if not self.sentences:
@@ -360,15 +424,15 @@ Font_Style: {metadata['font_style']}
                 'next': None,
                 'progress': "No text loaded"
             }
         current = self.get_styled_text(self.sentences[self.current_index])
         next_text = None
         if self.current_index < len(self.sentences) - 1:
             next_text = self.get_styled_text(self.sentences[self.current_index + 1])
         progress = f"Sentence {self.current_index + 1} of {len(self.sentences)}"
         return {
             'current': current,
             'next': next_text,
@@ -384,15 +448,15 @@ Font_Style: {metadata['font_style']}
                 'progress': "No text loaded",
                 'status': "⚠️ Please load a text file first"
             }
         if direction == "next" and self.current_index < len(self.sentences) - 1:
             self.current_index += 1
         elif direction == "prev" and self.current_index > 0:
             self.current_index -= 1
         nav_info = self.get_navigation_info()
         nav_info['status'] = "✅ Navigation successful"
         return nav_info
     def get_dataset_statistics(self) -> Dict:
@@ -401,18 +465,47 @@ Font_Style: {metadata['font_style']}
             metadata_file = self.root_path / 'metadata' / 'dataset_info.json'
             if not metadata_file.exists():
                 return {}
             with open(metadata_file, 'r') as f:
-                return json.load(f)
         except Exception as e:
             logger.error(f"Error reading dataset statistics: {str(e)}")
             return {}
-# Then create the interface function outside the class
 def create_interface():
     """Create Gradio interface with enhanced features"""
     # Create custom CSS for fonts
     custom_css = """
     .gradio-container {
@@ -431,22 +524,24 @@ def create_interface():
         min-height: 100px !important;
     }
     """
     # Add font-face declarations
     for font_style, font_info in FONT_STYLES.items():
-        if font_style in ['nastaliq', 'naskh']:
-            custom_css += f"""
             @font-face {{
                 font-family: '{font_info["family"]}';
-                src: url('fonts/{font_info["family"]}.ttf') format('truetype');
             }}
             """
-    collector = TTSDatasetCollector()
     with gr.Blocks(title="TTS Dataset Collection Tool", css=custom_css) as interface:
         gr.Markdown("# TTS Dataset Collection Tool")
         with gr.Row():
             # Left column - Configuration and Input
             with gr.Column():
@@ -472,48 +567,54 @@ def create_interface():
                     value="english_serif",
                     label="Select Font Style"
                 )
             # Right column - Recording
             with gr.Column():
                 current_text = gr.HTML(
                     label="Current Sentence",
                     elem_classes=["sentence-display"]
                 )
                 audio_recorder = gr.Audio(
                     label="Record Audio",
                     type="filepath",
                     elem_classes=["record-button"]
                 )
-                next_text = gr.HTML(
-                    label="Next Sentence",
-                    elem_classes=["sentence-display"]
-                )
-        # Controls
-        with gr.Row():
-            prev_btn = gr.Button("Previous", variant="secondary")
-            next_btn = gr.Button("Next", variant="primary")
-            save_btn = gr.Button("Save Recording", variant="primary", elem_classes=["record-button"])
-        # Status and Progress
-        with gr.Row():
-            progress = gr.Textbox(
-                label="Progress",
-                interactive=False
-            )
             status = gr.Textbox(
                 label="Status",
                 interactive=False,
                 max_lines=3
             )
-        # Dataset Info
         with gr.Row():
             dataset_info = gr.JSON(
                 label="Dataset Statistics",
                 value={}
             )
         def process_pasted_text(text):
             """Handle pasted text input"""
             if not text:
@@ -534,29 +635,30 @@ def create_interface():
                     status: f"❌ {msg}",
                     dataset_info: collector.get_dataset_statistics()
                 }
             nav_info = collector.get_navigation_info()
             return {
                 current_text: nav_info['current'],
                 next_text: nav_info['next'],
-                progress: nav_info['progress'],
                 status: f"✅ {msg}",
                 dataset_info: collector.get_dataset_statistics()
             }
         def update_font(font_style):
             """Update font and refresh display"""
             success, msg = collector.set_font(font_style)
             if not success:
                 return {status: msg}
             nav_info = collector.get_navigation_info()
             return {
                 current_text: nav_info['current'],
                 next_text: nav_info['next'],
                 status: f"Font updated to {font_style}"
             }
         def load_file(file):
             """Handle file loading with enhanced error reporting"""
             if not file:
@@ -577,98 +679,130 @@ def create_interface():
                     status: f"❌ {msg}",
                     dataset_info: collector.get_dataset_statistics()
                 }
             nav_info = collector.get_navigation_info()
             return {
                 current_text: nav_info['current'],
                 next_text: nav_info['next'],
-                progress: nav_info['progress'],
                 status: f"✅ {msg}",
                 dataset_info: collector.get_dataset_statistics()
             }
         def save_current_recording(audio_file, speaker_id_value, dataset_name_value):
             """Handle saving the current recording"""
             if not audio_file:
-                return {status: "⚠️ Please record audio first"}
             success, msg = collector.save_recording(
                 audio_file, speaker_id_value, dataset_name_value
             )
             if not success:
                 return {
                     status: f"❌ {msg}",
-                    dataset_info: collector.get_dataset_statistics()
                 }
             # Auto-advance to next sentence after successful save
             nav_info = collector.navigate("next")
             return {
                 current_text: nav_info['current'],
                 next_text: nav_info['next'],
-                progress: nav_info['progress'],
                 status: f"✅ {msg}",
-                dataset_info: collector.get_dataset_statistics()
             }
         def navigate_sentences(direction):
             """Handle navigation between sentences"""
             nav_info = collector.navigate(direction)
             return {
                 current_text: nav_info['current'],
                 next_text: nav_info['next'],
-                progress: nav_info['progress'],
                 status: nav_info['status']
             }
         # Event handlers
         text_input.change(
             process_pasted_text,
             inputs=[text_input],
             outputs=[current_text, next_text, progress, status, dataset_info]
         )
         file_input.upload(
             load_file,
             inputs=[file_input],
             outputs=[current_text, next_text, progress, status, dataset_info]
         )
         font_select.change(
             update_font,
             inputs=[font_select],
             outputs=[current_text, next_text, status]
         )
         save_btn.click(
             save_current_recording,
             inputs=[audio_recorder, speaker_id, dataset_name],
-            outputs=[current_text, next_text, progress, status, dataset_info]
         )
         prev_btn.click(
             lambda: navigate_sentences("prev"),
             outputs=[current_text, next_text, progress, status]
         )
         next_btn.click(
             lambda: navigate_sentences("next"),
             outputs=[current_text, next_text, progress, status]
         )
         # Initialize dataset info
         dataset_info.value = collector.get_dataset_statistics()
-        return interface
 if __name__ == "__main__":
     try:
         # Set up any required environment variables
         os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
         os.environ["GRADIO_SERVER_PORT"] = "7860"
         # Create and launch the interface
         interface = create_interface()
         interface.queue()  # Enable queuing for better handling of concurrent users

 """
+TTS Dataset Collection Tool with Custom Fonts and Enhanced Features
 """
 import os
 import json
 import nltk
 import gradio as gr
+import uuid
 from datetime import datetime
 from pathlib import Path
 import logging
+from typing import Dict, Tuple, Optional
 import traceback
+import soundfile as sf
+import re
 # Download NLTK data during initialization
 try:
 FONT_STYLES = {
     "english_serif": {
         "name": "Times New Roman",
+        "family": "Times New Roman",
         "css": "font-family: 'Times New Roman', serif;"
     },
     "english_sans": {
         "name": "Arial",
+        "family": "Arial",
         "css": "font-family: Arial, sans-serif;"
     },
     "nastaliq": {
 class TTSDatasetCollector:
     """Manages TTS dataset collection and organization with enhanced features"""
     def __init__(self):
         """Initialize the TTS Dataset Collector"""
         self.root_path = Path(os.path.dirname(os.path.abspath(__file__))) / "dataset"
+        self.fonts_path = self.root_path / "fonts"
         self.sentences = []
         self.current_index = 0
         self.current_font = "english_serif"
+        self.custom_fonts = {}
         self.setup_directories()
         # Ensure NLTK data is downloaded
         try:
             nltk.data.find('tokenizers/punkt')
         except LookupError:
             nltk.download('punkt', quiet=True)
         logger.info("TTS Dataset Collector initialized")
     def setup_directories(self) -> None:
         """Create necessary directory structure with logging"""
         try:
             # Create main dataset directory
+            self.root_path.mkdir(parents=True, exist_ok=True)
             # Create subdirectories
             for subdir in ['audio', 'transcriptions', 'metadata', 'fonts']:
+                (self.root_path / subdir).mkdir(parents=True, exist_ok=True)
             # Initialize log file
             log_file = self.root_path / 'dataset_log.txt'
             if not log_file.exists():
                 with open(log_file, 'w', encoding='utf-8') as f:
                     f.write(f"Dataset collection initialized on {datetime.now().isoformat()}\n")
             logger.info("Directory structure created successfully")
         except Exception as e:
             logger.error(f"Failed to create directory structure: {str(e)}")
             logger.error(traceback.format_exc())
             raise RuntimeError("Failed to initialize directory structure")
     def log_operation(self, message: str, level: str = "info") -> None:
         """Log operations with timestamp and level"""
         try:
             log_file = self.root_path / 'dataset_log.txt'
             timestamp = datetime.now().isoformat()
             with open(log_file, 'a', encoding='utf-8') as f:
                 f.write(f"[{timestamp}] [{level.upper()}] {message}\n")
             if level.lower() == "error":
                 logger.error(message)
             else:
                 logger.info(message)
         except Exception as e:
             logger.error(f"Failed to log operation: {str(e)}")
     def process_text(self, text: str) -> Tuple[bool, str]:
         """Process pasted or loaded text with error handling"""
         try:
             if not text.strip():
                 return False, "Text is empty"
             # Simple sentence splitting as fallback
             def simple_split_sentences(text):
                 # Split on common sentence endings
                 sentences = []
                 current = []
                 for line in text.split('\n'):
                     line = line.strip()
                     if not line:
                         continue
                     # Split on common sentence endings
                     parts = line.replace('!', '.').replace('?', '.').split('.')
                     for part in parts:
                             current.append(part)
                             sentences.append(' '.join(current))
                             current = []
                 if current:
                     sentences.append(' '.join(current))
                 return [s.strip() for s in sentences if s.strip()]
             try:
                 # Try NLTK first
                 self.sentences = nltk.sent_tokenize(text.strip())
                 logger.warning(f"NLTK tokenization failed, falling back to simple splitting: {str(e)}")
                 # Fallback to simple splitting
                 self.sentences = simple_split_sentences(text.strip())
             if not self.sentences:
                 return False, "No valid sentences found in text"
             self.current_index = 0
             # Log success
             self.log_operation(f"Processed text with {len(self.sentences)} sentences")
             return True, f"Successfully loaded {len(self.sentences)} sentences"
         except Exception as e:
             error_msg = f"Error processing text: {str(e)}"
             self.log_operation(error_msg, "error")
         """Process and load text file with enhanced error handling"""
         if not file:
             return False, "No file provided"
         try:
             # Validate file extension
             if not file.name.endswith('.txt'):
                 return False, "Only .txt files are supported"
             with open(file.name, 'r', encoding='utf-8') as f:
                 text = f.read()
             return self.process_text(text)
         except UnicodeDecodeError:
             error_msg = "File encoding error. Please ensure the file is UTF-8 encoded"
             self.log_operation(error_msg, "error")
     def set_font(self, font_style: str) -> Tuple[bool, str]:
         """Set the current font style"""
+        if font_style not in FONT_STYLES and font_style not in self.custom_fonts:
+            return False, f"Invalid font style. Available styles: {', '.join(FONT_STYLES.keys()) + ', ' + ', '.join(self.custom_fonts.keys())}"
         self.current_font = font_style
         return True, f"Font style set to {font_style}"
+    def add_custom_font(self, font_file) -> Tuple[bool, str]:
+        """Add a custom font from the uploaded TTF file"""
+        try:
+            if not font_file.name.endswith('.ttf'):
+                return False, "Only .ttf font files are supported"
+            # Generate a unique font family name
+            font_family = f"font_{uuid.uuid4().hex[:8]}"
+            font_filename = font_family + '.ttf'
+            font_dest = self.fonts_path / font_filename
+            # Save the font file
+            with open(font_dest, 'wb') as f:
+                f.write(font_file.read())
+            # Add to custom fonts
+            self.custom_fonts[font_family] = {
+                'name': font_file.name,
+                'family': font_family,
+                'css': f"font-family: '{font_family}', serif;"
+            }
+            # Update the FONT_STYLES with the custom font
+            FONT_STYLES[font_family] = self.custom_fonts[font_family]
+            # Log success
+            self.log_operation(f"Added custom font: {font_file.name} as {font_family}")
+            return True, f"Custom font '{font_file.name}' added successfully"
+        except Exception as e:
+            error_msg = f"Error adding custom font: {str(e)}"
+            self.log_operation(error_msg, "error")
+            logger.error(traceback.format_exc())
+            return False, error_msg
+    def generate_filenames(self, dataset_name: str, speaker_id: str, sentence_text: str) -> Tuple[str, str]:
         """Generate unique filenames for audio and text files"""
+        line_number = self.current_index + 1
         timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
+        # Sanitize strings for filenames
+        def sanitize_filename(s):
+            return re.sub(r'[^a-zA-Z0-9_-]', '_', s)[:50]
+        dataset_name_safe = sanitize_filename(dataset_name)
+        speaker_id_safe = sanitize_filename(speaker_id)
+        sentence_excerpt = sanitize_filename(sentence_text[:20])
+        base_name = f"{dataset_name_safe}_{speaker_id_safe}_line{line_number}_{sentence_excerpt}_{timestamp}"
         return f"{base_name}.wav", f"{base_name}.txt"
     def save_recording(self, audio_file, speaker_id: str, dataset_name: str) -> Tuple[bool, str]:
         """Save recording with enhanced error handling and logging"""
         if not all([audio_file, speaker_id, dataset_name]):
             missing = []
+            if not audio_file:
+                missing.append("audio recording")
+            if not speaker_id:
+                missing.append("speaker ID")
+            if not dataset_name:
+                missing.append("dataset name")
             return False, f"Missing required information: {', '.join(missing)}"
+        # Check if sentences have been loaded
+        if not self.sentences:
+            return False, "No sentences have been loaded. Please load text before saving recordings."
+        if self.current_index >= len(self.sentences):
+            return False, "Current sentence index is out of range."
         try:
             # Validate inputs
             if not speaker_id.strip().isalnum():
                 return False, "Speaker ID must contain only letters and numbers"
             if not dataset_name.strip().isalnum():
                 return False, "Dataset name must contain only letters and numbers"
+            # Get current sentence text
+            sentence_text = self.sentences[self.current_index]
             # Generate filenames
+            audio_name, text_name = self.generate_filenames(dataset_name, speaker_id, sentence_text)
             # Create speaker directories
             audio_dir = self.root_path / 'audio' / speaker_id
             text_dir = self.root_path / 'transcriptions' / speaker_id
+            audio_dir.mkdir(parents=True, exist_ok=True)
+            text_dir.mkdir(parents=True, exist_ok=True)
             # Save audio file
             audio_path = audio_dir / audio_name
+            # Read the audio file using soundfile
+            audio_data, sampling_rate = sf.read(audio_file)
+            # Save audio file
+            sf.write(str(audio_path), audio_data, sampling_rate)
             # Save transcription
             text_path = text_dir / text_name
             self.save_transcription(
                 text_path,
+                sentence_text,
                 {
                     'speaker_id': speaker_id,
                     'dataset_name': dataset_name,
                     'font_style': self.current_font
                 }
             )
             # Update metadata
             self.update_metadata(speaker_id, dataset_name)
             # Log success
             self.log_operation(
                 f"Saved recording: Speaker={speaker_id}, Dataset={dataset_name}, "
                 f"Audio={audio_name}, Text={text_name}"
             )
             return True, f"Recording saved successfully as {audio_name}"
         except Exception as e:
             error_msg = f"Error saving recording: {str(e)}"
             self.log_operation(error_msg, "error")
             logger.error(traceback.format_exc())
             return False, error_msg
     def save_transcription(self, file_path: Path, text: str, metadata: Dict) -> None:
         """Save transcription with metadata"""
         content = f"""[METADATA]
 """
         with open(file_path, 'w', encoding='utf-8') as f:
             f.write(content)
     def update_metadata(self, speaker_id: str, dataset_name: str) -> None:
         """Update dataset metadata with error handling"""
         metadata_file = self.root_path / 'metadata' / 'dataset_info.json'
         try:
             if metadata_file.exists():
                 with open(metadata_file, 'r') as f:
                     metadata = json.load(f)
             else:
                 metadata = {'speakers': {}, 'last_updated': None}
             # Update speaker data
             if speaker_id not in metadata['speakers']:
                 metadata['speakers'][speaker_id] = {
                     'total_recordings': 0,
                     'datasets': {}
                 }
             if dataset_name not in metadata['speakers'][speaker_id]['datasets']:
                 metadata['speakers'][speaker_id]['datasets'][dataset_name] = {
                     'recordings': 0,
                     'sentences': len(self.sentences),
+                    'recorded_sentences': [],
                     'first_recording': datetime.now().isoformat(),
                     'last_recording': None,
                     'font_styles_used': []
                 }
             # Update counts and timestamps
             metadata['speakers'][speaker_id]['total_recordings'] += 1
             metadata['speakers'][speaker_id]['datasets'][dataset_name]['recordings'] += 1
             metadata['speakers'][speaker_id]['datasets'][dataset_name]['last_recording'] = \
                 datetime.now().isoformat()
+            # Add current index to recorded sentences
+            if self.current_index not in metadata['speakers'][speaker_id]['datasets'][dataset_name]['recorded_sentences']:
+                metadata['speakers'][speaker_id]['datasets'][dataset_name]['recorded_sentences'].append(self.current_index)
             # Update font styles
             if self.current_font not in metadata['speakers'][speaker_id]['datasets'][dataset_name]['font_styles_used']:
                 metadata['speakers'][speaker_id]['datasets'][dataset_name]['font_styles_used'].append(
                     self.current_font
                 )
             metadata['last_updated'] = datetime.now().isoformat()
             # Save updated metadata
             with open(metadata_file, 'w') as f:
                 json.dump(metadata, f, indent=2)
             self.log_operation(f"Updated metadata for {speaker_id} in {dataset_name}")
         except Exception as e:
             error_msg = f"Error updating metadata: {str(e)}"
             self.log_operation(error_msg, "error")
             logger.error(traceback.format_exc())
     def get_navigation_info(self) -> Dict[str, Optional[str]]:
         """Get current and next sentence information"""
         if not self.sentences:
                 'next': None,
                 'progress': "No text loaded"
             }
         current = self.get_styled_text(self.sentences[self.current_index])
         next_text = None
         if self.current_index < len(self.sentences) - 1:
             next_text = self.get_styled_text(self.sentences[self.current_index + 1])
         progress = f"Sentence {self.current_index + 1} of {len(self.sentences)}"
         return {
             'current': current,
             'next': next_text,
                 'progress': "No text loaded",
                 'status': "⚠️ Please load a text file first"
             }
         if direction == "next" and self.current_index < len(self.sentences) - 1:
             self.current_index += 1
         elif direction == "prev" and self.current_index > 0:
             self.current_index -= 1
         nav_info = self.get_navigation_info()
         nav_info['status'] = "✅ Navigation successful"
         return nav_info
     def get_dataset_statistics(self) -> Dict:
             metadata_file = self.root_path / 'metadata' / 'dataset_info.json'
             if not metadata_file.exists():
                 return {}
             with open(metadata_file, 'r') as f:
+                metadata = json.load(f)
+            # Flatten statistics for display
+            total_sentences = len(self.sentences)
+            recorded = len(set(metadata['speakers'][list(metadata['speakers'].keys())[0]]['datasets'][list(metadata['speakers'][list(metadata['speakers'].keys())[0]]['datasets'].keys())[0]]['recorded_sentences'])) if metadata['speakers'] else 0
+            remaining = total_sentences - recorded
+            stats = {
+                "Total Sentences": total_sentences,
+                "Recorded Sentences": recorded,
+                "Remaining Sentences": remaining,
+                "Last Updated": metadata.get('last_updated', 'N/A')
+            }
+            return stats
         except Exception as e:
             logger.error(f"Error reading dataset statistics: {str(e)}")
             return {}
+    def get_last_audio_path(self, speaker_id: str) -> Optional[str]:
+        """Get the path to the last saved audio file for downloading"""
+        audio_dir = self.root_path / 'audio' / speaker_id
+        audio_files = sorted(audio_dir.glob('*.wav'), key=lambda f: f.stat().st_mtime, reverse=True)
+        if audio_files:
+            return str(audio_files[0])
+        else:
+            return None
+    def get_last_transcript_path(self, speaker_id: str) -> Optional[str]:
+        """Get the path to the last saved transcription file for downloading"""
+        text_dir = self.root_path / 'transcriptions' / speaker_id
+        text_files = sorted(text_dir.glob('*.txt'), key=lambda f: f.stat().st_mtime, reverse=True)
+        if text_files:
+            return str(text_files[0])
+        else:
+            return None
 def create_interface():
     """Create Gradio interface with enhanced features"""
+    collector = TTSDatasetCollector()
     # Create custom CSS for fonts
     custom_css = """
     .gradio-container {
         min-height: 100px !important;
     }
     """
     # Add font-face declarations
+    font_face_css = ""
     for font_style, font_info in FONT_STYLES.items():
+        if font_style in ['nastaliq', 'naskh'] or font_style in collector.custom_fonts:
+            font_file_name = font_info['family'] + '.ttf' if font_style not in collector.custom_fonts else font_info['family'] + '.ttf'
+            font_face_css += f"""
             @font-face {{
                 font-family: '{font_info["family"]}';
+                src: url('fonts/{font_file_name}') format('truetype');
             }}
             """
+    custom_css += font_face_css
     with gr.Blocks(title="TTS Dataset Collection Tool", css=custom_css) as interface:
         gr.Markdown("# TTS Dataset Collection Tool")
         with gr.Row():
             # Left column - Configuration and Input
             with gr.Column():
                     value="english_serif",
                     label="Select Font Style"
                 )
+                # Custom font upload
+                font_file_input = gr.File(
+                    label="Upload Custom Font (.ttf)",
+                    file_types=[".ttf"]
+                )
+                add_font_btn = gr.Button("Add Custom Font")
             # Right column - Recording
             with gr.Column():
                 current_text = gr.HTML(
                     label="Current Sentence",
                     elem_classes=["sentence-display"]
                 )
+                next_text = gr.HTML(
+                    label="Next Sentence",
+                    elem_classes=["sentence-display"]
+                )
+                progress = gr.Markdown("")
                 audio_recorder = gr.Audio(
                     label="Record Audio",
                     type="filepath",
                     elem_classes=["record-button"]
                 )
+                # Controls
+                with gr.Row():
+                    prev_btn = gr.Button("Previous", variant="secondary")
+                    save_btn = gr.Button("Save Recording", variant="primary", elem_classes=["record-button"])
+                    next_btn = gr.Button("Next", variant="primary")
+            # Status and Progress
             status = gr.Textbox(
                 label="Status",
                 interactive=False,
                 max_lines=3
             )
+        # Dataset Info and Download Links
         with gr.Row():
             dataset_info = gr.JSON(
                 label="Dataset Statistics",
                 value={}
             )
+        with gr.Row():
+            download_audio = gr.File(label="Download Audio", interactive=False)
+            download_transcript = gr.File(label="Download Transcript", interactive=False)
         def process_pasted_text(text):
             """Handle pasted text input"""
             if not text:
                     status: f"❌ {msg}",
                     dataset_info: collector.get_dataset_statistics()
                 }
             nav_info = collector.get_navigation_info()
+            progress_bar = gr.HTML.update(value=f"<progress value='{collector.current_index}' max='{len(collector.sentences)}'></progress>")
             return {
                 current_text: nav_info['current'],
                 next_text: nav_info['next'],
+                progress: progress_bar,
                 status: f"✅ {msg}",
                 dataset_info: collector.get_dataset_statistics()
             }
         def update_font(font_style):
             """Update font and refresh display"""
             success, msg = collector.set_font(font_style)
             if not success:
                 return {status: msg}
             nav_info = collector.get_navigation_info()
             return {
                 current_text: nav_info['current'],
                 next_text: nav_info['next'],
                 status: f"Font updated to {font_style}"
             }
         def load_file(file):
             """Handle file loading with enhanced error reporting"""
             if not file:
                     status: f"❌ {msg}",
                     dataset_info: collector.get_dataset_statistics()
                 }
             nav_info = collector.get_navigation_info()
+            progress_bar = gr.HTML.update(value=f"<progress value='{collector.current_index}' max='{len(collector.sentences)}'></progress>")
             return {
                 current_text: nav_info['current'],
                 next_text: nav_info['next'],
+                progress: progress_bar,
                 status: f"✅ {msg}",
                 dataset_info: collector.get_dataset_statistics()
             }
         def save_current_recording(audio_file, speaker_id_value, dataset_name_value):
             """Handle saving the current recording"""
             if not audio_file:
+                return {
+                    status: "⚠️ Please record audio first",
+                    download_audio: None,
+                    download_transcript: None
+                }
             success, msg = collector.save_recording(
                 audio_file, speaker_id_value, dataset_name_value
             )
             if not success:
                 return {
                     status: f"❌ {msg}",
+                    dataset_info: collector.get_dataset_statistics(),
+                    download_audio: None,
+                    download_transcript: None
                 }
+            # Get paths to the saved files
+            audio_path = collector.get_last_audio_path(speaker_id_value)
+            transcript_path = collector.get_last_transcript_path(speaker_id_value)
             # Auto-advance to next sentence after successful save
             nav_info = collector.navigate("next")
+            progress_bar = gr.HTML.update(value=f"<progress value='{collector.current_index}' max='{len(collector.sentences)}'></progress>")
             return {
                 current_text: nav_info['current'],
                 next_text: nav_info['next'],
+                progress: progress_bar,
                 status: f"✅ {msg}",
+                dataset_info: collector.get_dataset_statistics(),
+                download_audio: audio_path,
+                download_transcript: transcript_path
             }
         def navigate_sentences(direction):
             """Handle navigation between sentences"""
             nav_info = collector.navigate(direction)
+            progress_bar = gr.HTML.update(value=f"<progress value='{collector.current_index}' max='{len(collector.sentences)}'></progress>")
             return {
                 current_text: nav_info['current'],
                 next_text: nav_info['next'],
+                progress: progress_bar,
                 status: nav_info['status']
             }
+        def add_custom_font(font_file):
+            """Handle adding a custom font"""
+            if not font_file:
+                return {status: "⚠️ No font file selected"}
+            success, msg = collector.add_custom_font(font_file)
+            if not success:
+                return {status: f"❌ {msg}"}
+            # Update font dropdown
+            font_choices = list(FONT_STYLES.keys())
+            font_select.update(choices=font_choices)
+            return {status: f"✅ {msg}"}
         # Event handlers
         text_input.change(
             process_pasted_text,
             inputs=[text_input],
             outputs=[current_text, next_text, progress, status, dataset_info]
         )
         file_input.upload(
             load_file,
             inputs=[file_input],
             outputs=[current_text, next_text, progress, status, dataset_info]
         )
         font_select.change(
             update_font,
             inputs=[font_select],
             outputs=[current_text, next_text, status]
         )
+        add_font_btn.click(
+            add_custom_font,
+            inputs=[font_file_input],
+            outputs=[status]
+        )
         save_btn.click(
             save_current_recording,
             inputs=[audio_recorder, speaker_id, dataset_name],
+            outputs=[current_text, next_text, progress, status, dataset_info, download_audio, download_transcript]
         )
         prev_btn.click(
             lambda: navigate_sentences("prev"),
             outputs=[current_text, next_text, progress, status]
         )
         next_btn.click(
             lambda: navigate_sentences("next"),
             outputs=[current_text, next_text, progress, status]
         )
         # Initialize dataset info
         dataset_info.value = collector.get_dataset_statistics()
+    return interface
 if __name__ == "__main__":
     try:
         # Set up any required environment variables
         os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
         os.environ["GRADIO_SERVER_PORT"] = "7860"
         # Create and launch the interface
         interface = create_interface()
         interface.queue()  # Enable queuing for better handling of concurrent users