jhj0517 commited on
Commit
bb48043
·
1 Parent(s): 4cbac55

add some args and fix timestamp bug

Browse files
modules/insanely_fast_whisper_inference.py CHANGED
@@ -9,6 +9,8 @@ import gradio as gr
9
  from huggingface_hub import hf_hub_download
10
  import whisper
11
 
 
 
12
  from modules.whisper_parameter import *
13
  from modules.whisper_base import WhisperBase
14
 
@@ -55,14 +57,32 @@ class InsanelyFastWhisperInference(WhisperBase):
55
 
56
  if params.lang == "Automatic Detection":
57
  params.lang = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
- progress(0, desc="Transcribing...")
60
- segments = self.model(
61
- inputs=audio,
62
- chunk_length_s=30,
63
- batch_size=24,
64
- return_timestamps=True,
65
- )
66
  segments_result = self.format_result(
67
  transcribed_result=segments,
68
  )
@@ -98,7 +118,6 @@ class InsanelyFastWhisperInference(WhisperBase):
98
 
99
  self.current_compute_type = compute_type
100
  self.current_model_size = model_size
101
-
102
  self.model = pipeline(
103
  "automatic-speech-recognition",
104
  model=os.path.join(self.model_dir, model_size),
@@ -118,8 +137,6 @@ class InsanelyFastWhisperInference(WhisperBase):
118
  ----------
119
  transcribed_result: dict
120
  Transcription result of the insanely_fast_whisper
121
- progress: gr.Progress
122
- Indicator to show progress directly in gradio.
123
 
124
  Returns
125
  ----------
@@ -129,6 +146,8 @@ class InsanelyFastWhisperInference(WhisperBase):
129
  result = transcribed_result["chunks"]
130
  for item in result:
131
  start, end = item["timestamp"][0], item["timestamp"][1]
 
 
132
  item["start"] = start
133
  item["end"] = end
134
  return result
 
9
  from huggingface_hub import hf_hub_download
10
  import whisper
11
 
12
+ from rich.progress import Progress, TimeElapsedColumn, BarColumn, TextColumn
13
+
14
  from modules.whisper_parameter import *
15
  from modules.whisper_base import WhisperBase
16
 
 
57
 
58
  if params.lang == "Automatic Detection":
59
  params.lang = None
60
+ else:
61
+ language_code_dict = {value: key for key, value in whisper.tokenizer.LANGUAGES.items()}
62
+ params.lang = language_code_dict[params.lang]
63
+
64
+ progress(0, desc="Transcribing...Progress is not shown in insanely-fast-whisper.")
65
+ with Progress(
66
+ TextColumn("[progress.description]{task.description}"),
67
+ BarColumn(style="yellow1", pulse_style="white"),
68
+ TimeElapsedColumn(),
69
+ ) as progress:
70
+ progress.add_task("[yellow]Transcribing...", total=None)
71
+
72
+ segments = self.model(
73
+ inputs=audio,
74
+ return_timestamps=True,
75
+ chunk_length_s=30,
76
+ batch_size=24,
77
+ generate_kwargs={
78
+ "language": params.lang,
79
+ "task": "translate" if params.is_translate and self.current_model_size in self.translatable_models else "transcribe",
80
+ "no_speech_threshold": params.no_speech_threshold,
81
+ "temperature": params.temperature,
82
+ "compression_ratio_threshold": params.compression_ratio_threshold
83
+ }
84
+ )
85
 
 
 
 
 
 
 
 
86
  segments_result = self.format_result(
87
  transcribed_result=segments,
88
  )
 
118
 
119
  self.current_compute_type = compute_type
120
  self.current_model_size = model_size
 
121
  self.model = pipeline(
122
  "automatic-speech-recognition",
123
  model=os.path.join(self.model_dir, model_size),
 
137
  ----------
138
  transcribed_result: dict
139
  Transcription result of the insanely_fast_whisper
 
 
140
 
141
  Returns
142
  ----------
 
146
  result = transcribed_result["chunks"]
147
  for item in result:
148
  start, end = item["timestamp"][0], item["timestamp"][1]
149
+ if end is None:
150
+ end = start
151
  item["start"] = start
152
  item["end"] = end
153
  return result