Omarrran commited on
Commit
14f7424
·
verified ·
1 Parent(s): 9636675

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -67
app.py CHANGED
@@ -13,6 +13,25 @@ import logging
13
  from typing import Dict, List, Tuple, Optional
14
  import traceback
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  # Configure logging
17
  logging.basicConfig(
18
  level=logging.INFO,
@@ -44,33 +63,19 @@ FONT_STYLES = {
44
  }
45
  }
46
 
 
47
  class TTSDatasetCollector:
48
  """Manages TTS dataset collection and organization with enhanced features"""
49
 
50
  def __init__(self):
51
  """Initialize the TTS Dataset Collector"""
52
- # Initialize NLTK
53
- self._initialize_nltk()
54
-
55
- # Set up paths and directories
56
  self.root_path = Path(os.path.dirname(os.path.abspath(__file__))) / "dataset"
57
- self.sentences: List[str] = []
58
- self.current_index: int = 0
59
- self.current_font: str = "english_serif"
60
  self.setup_directories()
61
-
62
  logger.info("TTS Dataset Collector initialized")
63
 
64
- def _initialize_nltk(self) -> None:
65
- """Initialize NLTK with error handling"""
66
- try:
67
- nltk.download('punkt', quiet=True)
68
- logger.info("NLTK punkt tokenizer downloaded successfully")
69
- except Exception as e:
70
- logger.error(f"Failed to download NLTK data: {str(e)}")
71
- logger.error(traceback.format_exc())
72
- raise RuntimeError("Failed to initialize NLTK. Please check your internet connection.")
73
-
74
  def setup_directories(self) -> None:
75
  """Create necessary directory structure with logging"""
76
  try:
@@ -111,6 +116,29 @@ class TTSDatasetCollector:
111
  except Exception as e:
112
  logger.error(f"Failed to log operation: {str(e)}")
113
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  def load_text_file(self, file) -> Tuple[bool, str]:
115
  """Process and load text file with enhanced error handling"""
116
  if not file:
@@ -124,23 +152,7 @@ class TTSDatasetCollector:
124
  with open(file.name, 'r', encoding='utf-8') as f:
125
  text = f.read()
126
 
127
- # Validate text content
128
- if not text.strip():
129
- return False, "File is empty"
130
-
131
- # Tokenize sentences
132
- self.sentences = nltk.sent_tokenize(text)
133
- if not self.sentences:
134
- return False, "No valid sentences found in file"
135
-
136
- self.current_index = 0
137
-
138
- # Log success
139
- self.log_operation(
140
- f"Loaded text file: {file.name} with {len(self.sentences)} sentences"
141
- )
142
-
143
- return True, f"Successfully loaded {len(self.sentences)} sentences"
144
 
145
  except UnicodeDecodeError:
146
  error_msg = "File encoding error. Please ensure the file is UTF-8 encoded"
@@ -157,20 +169,21 @@ class TTSDatasetCollector:
157
  font_css = FONT_STYLES[self.current_font]['css']
158
  return f"<div style='{font_css}'>{text}</div>"
159
 
160
-
161
- def generate_filenames(self, dataset_name: str, speaker_id: str) -> Tuple[str, str]:
162
- """Generate unique filenames for audio and text files"""
163
- timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
164
- sentence_id = f"{self.current_index+1:04d}"
165
- base_name = f"{dataset_name}_{speaker_id}_{sentence_id}_{timestamp}"
166
- return f"{base_name}.wav", f"{base_name}.txt"
167
-
168
  def set_font(self, font_style: str) -> Tuple[bool, str]:
169
  """Set the current font style"""
170
  if font_style not in FONT_STYLES:
171
  return False, f"Invalid font style. Available styles: {', '.join(FONT_STYLES.keys())}"
172
  self.current_font = font_style
173
  return True, f"Font style set to {font_style}"
 
 
 
 
 
 
 
 
 
174
 
175
  def save_recording(self, audio_file, speaker_id: str, dataset_name: str) -> Tuple[bool, str]:
176
  """Save recording with enhanced error handling and logging"""
@@ -299,29 +312,29 @@ Font_Style: {metadata['font_style']}
299
  error_msg = f"Error updating metadata: {str(e)}"
300
  self.log_operation(error_msg, "error")
301
  logger.error(traceback.format_exc())
302
-
303
- def get_navigation_info(self) -> Dict[str, Optional[str]]:
304
- """Get current and next sentence information"""
305
- if not self.sentences:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
  return {
307
- 'current': None,
308
- 'next': None,
309
- 'progress': "No text loaded"
310
  }
311
-
312
- current = self.get_styled_text(self.sentences[self.current_index])
313
- next_text = None
314
-
315
- if self.current_index < len(self.sentences) - 1:
316
- next_text = self.get_styled_text(self.sentences[self.current_index + 1])
317
-
318
- progress = f"Sentence {self.current_index + 1} of {len(self.sentences)}"
319
-
320
- return {
321
- 'current': current,
322
- 'next': next_text,
323
- 'progress': progress
324
- }
325
 
326
  def navigate(self, direction: str) -> Dict[str, Optional[str]]:
327
  """Navigate through sentences"""
@@ -390,14 +403,20 @@ def create_interface():
390
 
391
  collector = TTSDatasetCollector()
392
 
 
393
  with gr.Blocks(title="TTS Dataset Collection Tool", css=custom_css) as interface:
394
  gr.Markdown("# TTS Dataset Collection Tool")
395
 
396
  with gr.Row():
397
- # Left column - Configuration
398
  with gr.Column():
 
 
 
 
 
399
  file_input = gr.File(
400
- label="Upload Text File (.txt)",
401
  file_types=[".txt"]
402
  )
403
  speaker_id = gr.Textbox(
@@ -455,6 +474,36 @@ def create_interface():
455
  value={}
456
  )
457
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
458
  def update_font(font_style):
459
  """Update font and refresh display"""
460
  success, msg = collector.set_font(font_style)
@@ -535,6 +584,12 @@ def create_interface():
535
  }
536
 
537
  # Event handlers
 
 
 
 
 
 
538
  file_input.upload(
539
  load_file,
540
  inputs=[file_input],
@@ -567,7 +622,7 @@ def create_interface():
567
  dataset_info.value = collector.get_dataset_statistics()
568
 
569
  return interface
570
-
571
  if __name__ == "__main__":
572
  try:
573
  # Set up any required environment variables
 
13
  from typing import Dict, List, Tuple, Optional
14
  import traceback
15
 
16
+ # Download NLTK data during initialization
17
+ try:
18
+ nltk.download('punkt', quiet=True)
19
+ except Exception as e:
20
+ print(f"Warning: Failed to download NLTK data: {str(e)}")
21
+ print("Downloading from alternative source...")
22
+ try:
23
+ import ssl
24
+ try:
25
+ _create_unverified_https_context = ssl._create_unverified_context
26
+ except AttributeError:
27
+ pass
28
+ else:
29
+ ssl._create_default_https_context = _create_unverified_https_context
30
+ nltk.download('punkt', quiet=True)
31
+ except Exception as e:
32
+ print(f"Critical error downloading NLTK data: {str(e)}")
33
+ raise
34
+
35
  # Configure logging
36
  logging.basicConfig(
37
  level=logging.INFO,
 
63
  }
64
  }
65
 
66
+
67
  class TTSDatasetCollector:
68
  """Manages TTS dataset collection and organization with enhanced features"""
69
 
70
  def __init__(self):
71
  """Initialize the TTS Dataset Collector"""
 
 
 
 
72
  self.root_path = Path(os.path.dirname(os.path.abspath(__file__))) / "dataset"
73
+ self.sentences = []
74
+ self.current_index = 0
75
+ self.current_font = "english_serif"
76
  self.setup_directories()
 
77
  logger.info("TTS Dataset Collector initialized")
78
 
 
 
 
 
 
 
 
 
 
 
79
  def setup_directories(self) -> None:
80
  """Create necessary directory structure with logging"""
81
  try:
 
116
  except Exception as e:
117
  logger.error(f"Failed to log operation: {str(e)}")
118
 
119
+ def process_text(self, text: str) -> Tuple[bool, str]:
120
+ """Process pasted or loaded text with error handling"""
121
+ try:
122
+ if not text.strip():
123
+ return False, "Text is empty"
124
+
125
+ # Tokenize sentences
126
+ self.sentences = nltk.sent_tokenize(text.strip())
127
+ if not self.sentences:
128
+ return False, "No valid sentences found in text"
129
+
130
+ self.current_index = 0
131
+
132
+ # Log success
133
+ self.log_operation(f"Processed text with {len(self.sentences)} sentences")
134
+ return True, f"Successfully loaded {len(self.sentences)} sentences"
135
+
136
+ except Exception as e:
137
+ error_msg = f"Error processing text: {str(e)}"
138
+ self.log_operation(error_msg, "error")
139
+ logger.error(traceback.format_exc())
140
+ return False, error_msg
141
+
142
  def load_text_file(self, file) -> Tuple[bool, str]:
143
  """Process and load text file with enhanced error handling"""
144
  if not file:
 
152
  with open(file.name, 'r', encoding='utf-8') as f:
153
  text = f.read()
154
 
155
+ return self.process_text(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
  except UnicodeDecodeError:
158
  error_msg = "File encoding error. Please ensure the file is UTF-8 encoded"
 
169
  font_css = FONT_STYLES[self.current_font]['css']
170
  return f"<div style='{font_css}'>{text}</div>"
171
 
 
 
 
 
 
 
 
 
172
  def set_font(self, font_style: str) -> Tuple[bool, str]:
173
  """Set the current font style"""
174
  if font_style not in FONT_STYLES:
175
  return False, f"Invalid font style. Available styles: {', '.join(FONT_STYLES.keys())}"
176
  self.current_font = font_style
177
  return True, f"Font style set to {font_style}"
178
+
179
+
180
+
181
+ def generate_filenames(self, dataset_name: str, speaker_id: str) -> Tuple[str, str]:
182
+ """Generate unique filenames for audio and text files"""
183
+ timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
184
+ sentence_id = f"{self.current_index+1:04d}"
185
+ base_name = f"{dataset_name}_{speaker_id}_{sentence_id}_{timestamp}"
186
+ return f"{base_name}.wav", f"{base_name}.txt"
187
 
188
  def save_recording(self, audio_file, speaker_id: str, dataset_name: str) -> Tuple[bool, str]:
189
  """Save recording with enhanced error handling and logging"""
 
312
  error_msg = f"Error updating metadata: {str(e)}"
313
  self.log_operation(error_msg, "error")
314
  logger.error(traceback.format_exc())
315
+
316
+ def get_navigation_info(self) -> Dict[str, Optional[str]]:
317
+ """Get current and next sentence information"""
318
+ if not self.sentences:
319
+ return {
320
+ 'current': None,
321
+ 'next': None,
322
+ 'progress': "No text loaded"
323
+ }
324
+
325
+ current = self.get_styled_text(self.sentences[self.current_index])
326
+ next_text = None
327
+
328
+ if self.current_index < len(self.sentences) - 1:
329
+ next_text = self.get_styled_text(self.sentences[self.current_index + 1])
330
+
331
+ progress = f"Sentence {self.current_index + 1} of {len(self.sentences)}"
332
+
333
  return {
334
+ 'current': current,
335
+ 'next': next_text,
336
+ 'progress': progress
337
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
338
 
339
  def navigate(self, direction: str) -> Dict[str, Optional[str]]:
340
  """Navigate through sentences"""
 
403
 
404
  collector = TTSDatasetCollector()
405
 
406
+
407
  with gr.Blocks(title="TTS Dataset Collection Tool", css=custom_css) as interface:
408
  gr.Markdown("# TTS Dataset Collection Tool")
409
 
410
  with gr.Row():
411
+ # Left column - Configuration and Input
412
  with gr.Column():
413
+ text_input = gr.Textbox(
414
+ label="Paste Text",
415
+ placeholder="Paste your text here...",
416
+ lines=5
417
+ )
418
  file_input = gr.File(
419
+ label="Or Upload Text File (.txt)",
420
  file_types=[".txt"]
421
  )
422
  speaker_id = gr.Textbox(
 
474
  value={}
475
  )
476
 
477
+ def process_pasted_text(text):
478
+ """Handle pasted text input"""
479
+ if not text:
480
+ return {
481
+ current_text: "",
482
+ next_text: "",
483
+ progress: "",
484
+ status: "⚠️ No text provided",
485
+ dataset_info: collector.get_dataset_statistics()
486
+ }
487
+
488
+ success, msg = collector.process_text(text)
489
+ if not success:
490
+ return {
491
+ current_text: "",
492
+ next_text: "",
493
+ progress: "",
494
+ status: f"❌ {msg}",
495
+ dataset_info: collector.get_dataset_statistics()
496
+ }
497
+
498
+ nav_info = collector.get_navigation_info()
499
+ return {
500
+ current_text: nav_info['current'],
501
+ next_text: nav_info['next'],
502
+ progress: nav_info['progress'],
503
+ status: f"✅ {msg}",
504
+ dataset_info: collector.get_dataset_statistics()
505
+ }
506
+
507
  def update_font(font_style):
508
  """Update font and refresh display"""
509
  success, msg = collector.set_font(font_style)
 
584
  }
585
 
586
  # Event handlers
587
+ text_input.change(
588
+ process_pasted_text,
589
+ inputs=[text_input],
590
+ outputs=[current_text, next_text, progress, status, dataset_info]
591
+ )
592
+
593
  file_input.upload(
594
  load_file,
595
  inputs=[file_input],
 
622
  dataset_info.value = collector.get_dataset_statistics()
623
 
624
  return interface
625
+
626
  if __name__ == "__main__":
627
  try:
628
  # Set up any required environment variables