TTS_DATASET_MAKER_2

Sleeping

App Files Files Community

Omarrran commited on Nov 10, 2024

Commit

14f7424

verified ·

1 Parent(s): 9636675

Update app.py

Browse files

Files changed (1) hide show

app.py +122 -67

app.py CHANGED Viewed

@@ -13,6 +13,25 @@ import logging
 from typing import Dict, List, Tuple, Optional
 import traceback
 # Configure logging
 logging.basicConfig(
     level=logging.INFO,
@@ -44,33 +63,19 @@ FONT_STYLES = {
     }
 }
 class TTSDatasetCollector:
     """Manages TTS dataset collection and organization with enhanced features"""
     def __init__(self):
         """Initialize the TTS Dataset Collector"""
-        # Initialize NLTK
-        self._initialize_nltk()
-        # Set up paths and directories
         self.root_path = Path(os.path.dirname(os.path.abspath(__file__))) / "dataset"
-        self.sentences: List[str] = []
-        self.current_index: int = 0
-        self.current_font: str = "english_serif"
         self.setup_directories()
         logger.info("TTS Dataset Collector initialized")
-    def _initialize_nltk(self) -> None:
-        """Initialize NLTK with error handling"""
-        try:
-            nltk.download('punkt', quiet=True)
-            logger.info("NLTK punkt tokenizer downloaded successfully")
-        except Exception as e:
-            logger.error(f"Failed to download NLTK data: {str(e)}")
-            logger.error(traceback.format_exc())
-            raise RuntimeError("Failed to initialize NLTK. Please check your internet connection.")
     def setup_directories(self) -> None:
         """Create necessary directory structure with logging"""
         try:
@@ -111,6 +116,29 @@ class TTSDatasetCollector:
         except Exception as e:
             logger.error(f"Failed to log operation: {str(e)}")
     def load_text_file(self, file) -> Tuple[bool, str]:
         """Process and load text file with enhanced error handling"""
         if not file:
@@ -124,23 +152,7 @@ class TTSDatasetCollector:
             with open(file.name, 'r', encoding='utf-8') as f:
                 text = f.read()
-            # Validate text content
-            if not text.strip():
-                return False, "File is empty"
-            # Tokenize sentences
-            self.sentences = nltk.sent_tokenize(text)
-            if not self.sentences:
-                return False, "No valid sentences found in file"
-            self.current_index = 0
-            # Log success
-            self.log_operation(
-                f"Loaded text file: {file.name} with {len(self.sentences)} sentences"
-            )
-            return True, f"Successfully loaded {len(self.sentences)} sentences"
         except UnicodeDecodeError:
             error_msg = "File encoding error. Please ensure the file is UTF-8 encoded"
@@ -157,20 +169,21 @@ class TTSDatasetCollector:
         font_css = FONT_STYLES[self.current_font]['css']
         return f"<div style='{font_css}'>{text}</div>"
-    def generate_filenames(self, dataset_name: str, speaker_id: str) -> Tuple[str, str]:
-        """Generate unique filenames for audio and text files"""
-        timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
-        sentence_id = f"{self.current_index+1:04d}"
-        base_name = f"{dataset_name}_{speaker_id}_{sentence_id}_{timestamp}"
-        return f"{base_name}.wav", f"{base_name}.txt"
     def set_font(self, font_style: str) -> Tuple[bool, str]:
         """Set the current font style"""
         if font_style not in FONT_STYLES:
             return False, f"Invalid font style. Available styles: {', '.join(FONT_STYLES.keys())}"
         self.current_font = font_style
         return True, f"Font style set to {font_style}"
     def save_recording(self, audio_file, speaker_id: str, dataset_name: str) -> Tuple[bool, str]:
         """Save recording with enhanced error handling and logging"""
@@ -299,29 +312,29 @@ Font_Style: {metadata['font_style']}
             error_msg = f"Error updating metadata: {str(e)}"
             self.log_operation(error_msg, "error")
             logger.error(traceback.format_exc())
-    def get_navigation_info(self) -> Dict[str, Optional[str]]:
-        """Get current and next sentence information"""
-        if not self.sentences:
             return {
-                'current': None,
-                'next': None,
-                'progress': "No text loaded"
             }
-        current = self.get_styled_text(self.sentences[self.current_index])
-        next_text = None
-        if self.current_index < len(self.sentences) - 1:
-            next_text = self.get_styled_text(self.sentences[self.current_index + 1])
-        progress = f"Sentence {self.current_index + 1} of {len(self.sentences)}"
-        return {
-            'current': current,
-            'next': next_text,
-            'progress': progress
-        }
     def navigate(self, direction: str) -> Dict[str, Optional[str]]:
         """Navigate through sentences"""
@@ -390,14 +403,20 @@ def create_interface():
     collector = TTSDatasetCollector()
     with gr.Blocks(title="TTS Dataset Collection Tool", css=custom_css) as interface:
         gr.Markdown("# TTS Dataset Collection Tool")
         with gr.Row():
-            # Left column - Configuration
             with gr.Column():
                 file_input = gr.File(
-                    label="Upload Text File (.txt)",
                     file_types=[".txt"]
                 )
                 speaker_id = gr.Textbox(
@@ -455,6 +474,36 @@ def create_interface():
                 value={}
             )
         def update_font(font_style):
             """Update font and refresh display"""
             success, msg = collector.set_font(font_style)
@@ -535,6 +584,12 @@ def create_interface():
             }
         # Event handlers
         file_input.upload(
             load_file,
             inputs=[file_input],
@@ -567,7 +622,7 @@ def create_interface():
         dataset_info.value = collector.get_dataset_statistics()
         return interface
 if __name__ == "__main__":
     try:
         # Set up any required environment variables

 from typing import Dict, List, Tuple, Optional
 import traceback
+# Download NLTK data during initialization
+try:
+    nltk.download('punkt', quiet=True)
+except Exception as e:
+    print(f"Warning: Failed to download NLTK data: {str(e)}")
+    print("Downloading from alternative source...")
+    try:
+        import ssl
+        try:
+            _create_unverified_https_context = ssl._create_unverified_context
+        except AttributeError:
+            pass
+        else:
+            ssl._create_default_https_context = _create_unverified_https_context
+        nltk.download('punkt', quiet=True)
+    except Exception as e:
+        print(f"Critical error downloading NLTK data: {str(e)}")
+        raise
 # Configure logging
 logging.basicConfig(
     level=logging.INFO,
     }
 }
 class TTSDatasetCollector:
     """Manages TTS dataset collection and organization with enhanced features"""
     def __init__(self):
         """Initialize the TTS Dataset Collector"""
         self.root_path = Path(os.path.dirname(os.path.abspath(__file__))) / "dataset"
+        self.sentences = []
+        self.current_index = 0
+        self.current_font = "english_serif"
         self.setup_directories()
         logger.info("TTS Dataset Collector initialized")
     def setup_directories(self) -> None:
         """Create necessary directory structure with logging"""
         try:
         except Exception as e:
             logger.error(f"Failed to log operation: {str(e)}")
+    def process_text(self, text: str) -> Tuple[bool, str]:
+        """Process pasted or loaded text with error handling"""
+        try:
+            if not text.strip():
+                return False, "Text is empty"
+            # Tokenize sentences
+            self.sentences = nltk.sent_tokenize(text.strip())
+            if not self.sentences:
+                return False, "No valid sentences found in text"
+            self.current_index = 0
+            # Log success
+            self.log_operation(f"Processed text with {len(self.sentences)} sentences")
+            return True, f"Successfully loaded {len(self.sentences)} sentences"
+        except Exception as e:
+            error_msg = f"Error processing text: {str(e)}"
+            self.log_operation(error_msg, "error")
+            logger.error(traceback.format_exc())
+            return False, error_msg
     def load_text_file(self, file) -> Tuple[bool, str]:
         """Process and load text file with enhanced error handling"""
         if not file:
             with open(file.name, 'r', encoding='utf-8') as f:
                 text = f.read()
+            return self.process_text(text)
         except UnicodeDecodeError:
             error_msg = "File encoding error. Please ensure the file is UTF-8 encoded"
         font_css = FONT_STYLES[self.current_font]['css']
         return f"<div style='{font_css}'>{text}</div>"
     def set_font(self, font_style: str) -> Tuple[bool, str]:
         """Set the current font style"""
         if font_style not in FONT_STYLES:
             return False, f"Invalid font style. Available styles: {', '.join(FONT_STYLES.keys())}"
         self.current_font = font_style
         return True, f"Font style set to {font_style}"
+    def generate_filenames(self, dataset_name: str, speaker_id: str) -> Tuple[str, str]:
+        """Generate unique filenames for audio and text files"""
+        timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
+        sentence_id = f"{self.current_index+1:04d}"
+        base_name = f"{dataset_name}_{speaker_id}_{sentence_id}_{timestamp}"
+        return f"{base_name}.wav", f"{base_name}.txt"
     def save_recording(self, audio_file, speaker_id: str, dataset_name: str) -> Tuple[bool, str]:
         """Save recording with enhanced error handling and logging"""
             error_msg = f"Error updating metadata: {str(e)}"
             self.log_operation(error_msg, "error")
             logger.error(traceback.format_exc())
+        def get_navigation_info(self) -> Dict[str, Optional[str]]:
+            """Get current and next sentence information"""
+            if not self.sentences:
+                return {
+                    'current': None,
+                    'next': None,
+                    'progress': "No text loaded"
+                }
+            current = self.get_styled_text(self.sentences[self.current_index])
+            next_text = None
+            if self.current_index < len(self.sentences) - 1:
+                next_text = self.get_styled_text(self.sentences[self.current_index + 1])
+            progress = f"Sentence {self.current_index + 1} of {len(self.sentences)}"
             return {
+                'current': current,
+                'next': next_text,
+                'progress': progress
             }
     def navigate(self, direction: str) -> Dict[str, Optional[str]]:
         """Navigate through sentences"""
     collector = TTSDatasetCollector()
     with gr.Blocks(title="TTS Dataset Collection Tool", css=custom_css) as interface:
         gr.Markdown("# TTS Dataset Collection Tool")
         with gr.Row():
+            # Left column - Configuration and Input
             with gr.Column():
+                text_input = gr.Textbox(
+                    label="Paste Text",
+                    placeholder="Paste your text here...",
+                    lines=5
+                )
                 file_input = gr.File(
+                    label="Or Upload Text File (.txt)",
                     file_types=[".txt"]
                 )
                 speaker_id = gr.Textbox(
                 value={}
             )
+        def process_pasted_text(text):
+            """Handle pasted text input"""
+            if not text:
+                return {
+                    current_text: "",
+                    next_text: "",
+                    progress: "",
+                    status: "⚠️ No text provided",
+                    dataset_info: collector.get_dataset_statistics()
+                }
+            success, msg = collector.process_text(text)
+            if not success:
+                return {
+                    current_text: "",
+                    next_text: "",
+                    progress: "",
+                    status: f"❌ {msg}",
+                    dataset_info: collector.get_dataset_statistics()
+                }
+            nav_info = collector.get_navigation_info()
+            return {
+                current_text: nav_info['current'],
+                next_text: nav_info['next'],
+                progress: nav_info['progress'],
+                status: f"✅ {msg}",
+                dataset_info: collector.get_dataset_statistics()
+            }
         def update_font(font_style):
             """Update font and refresh display"""
             success, msg = collector.set_font(font_style)
             }
         # Event handlers
+        text_input.change(
+            process_pasted_text,
+            inputs=[text_input],
+            outputs=[current_text, next_text, progress, status, dataset_info]
+        )
         file_input.upload(
             load_file,
             inputs=[file_input],
         dataset_info.value = collector.get_dataset_statistics()
         return interface
 if __name__ == "__main__":
     try:
         # Set up any required environment variables