Spaces:
Running
Running
jhj0517
commited on
Commit
·
bb48043
1
Parent(s):
4cbac55
add some args and fix timestamp bug
Browse files
modules/insanely_fast_whisper_inference.py
CHANGED
@@ -9,6 +9,8 @@ import gradio as gr
|
|
9 |
from huggingface_hub import hf_hub_download
|
10 |
import whisper
|
11 |
|
|
|
|
|
12 |
from modules.whisper_parameter import *
|
13 |
from modules.whisper_base import WhisperBase
|
14 |
|
@@ -55,14 +57,32 @@ class InsanelyFastWhisperInference(WhisperBase):
|
|
55 |
|
56 |
if params.lang == "Automatic Detection":
|
57 |
params.lang = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
-
progress(0, desc="Transcribing...")
|
60 |
-
segments = self.model(
|
61 |
-
inputs=audio,
|
62 |
-
chunk_length_s=30,
|
63 |
-
batch_size=24,
|
64 |
-
return_timestamps=True,
|
65 |
-
)
|
66 |
segments_result = self.format_result(
|
67 |
transcribed_result=segments,
|
68 |
)
|
@@ -98,7 +118,6 @@ class InsanelyFastWhisperInference(WhisperBase):
|
|
98 |
|
99 |
self.current_compute_type = compute_type
|
100 |
self.current_model_size = model_size
|
101 |
-
|
102 |
self.model = pipeline(
|
103 |
"automatic-speech-recognition",
|
104 |
model=os.path.join(self.model_dir, model_size),
|
@@ -118,8 +137,6 @@ class InsanelyFastWhisperInference(WhisperBase):
|
|
118 |
----------
|
119 |
transcribed_result: dict
|
120 |
Transcription result of the insanely_fast_whisper
|
121 |
-
progress: gr.Progress
|
122 |
-
Indicator to show progress directly in gradio.
|
123 |
|
124 |
Returns
|
125 |
----------
|
@@ -129,6 +146,8 @@ class InsanelyFastWhisperInference(WhisperBase):
|
|
129 |
result = transcribed_result["chunks"]
|
130 |
for item in result:
|
131 |
start, end = item["timestamp"][0], item["timestamp"][1]
|
|
|
|
|
132 |
item["start"] = start
|
133 |
item["end"] = end
|
134 |
return result
|
|
|
9 |
from huggingface_hub import hf_hub_download
|
10 |
import whisper
|
11 |
|
12 |
+
from rich.progress import Progress, TimeElapsedColumn, BarColumn, TextColumn
|
13 |
+
|
14 |
from modules.whisper_parameter import *
|
15 |
from modules.whisper_base import WhisperBase
|
16 |
|
|
|
57 |
|
58 |
if params.lang == "Automatic Detection":
|
59 |
params.lang = None
|
60 |
+
else:
|
61 |
+
language_code_dict = {value: key for key, value in whisper.tokenizer.LANGUAGES.items()}
|
62 |
+
params.lang = language_code_dict[params.lang]
|
63 |
+
|
64 |
+
progress(0, desc="Transcribing...Progress is not shown in insanely-fast-whisper.")
|
65 |
+
with Progress(
|
66 |
+
TextColumn("[progress.description]{task.description}"),
|
67 |
+
BarColumn(style="yellow1", pulse_style="white"),
|
68 |
+
TimeElapsedColumn(),
|
69 |
+
) as progress:
|
70 |
+
progress.add_task("[yellow]Transcribing...", total=None)
|
71 |
+
|
72 |
+
segments = self.model(
|
73 |
+
inputs=audio,
|
74 |
+
return_timestamps=True,
|
75 |
+
chunk_length_s=30,
|
76 |
+
batch_size=24,
|
77 |
+
generate_kwargs={
|
78 |
+
"language": params.lang,
|
79 |
+
"task": "translate" if params.is_translate and self.current_model_size in self.translatable_models else "transcribe",
|
80 |
+
"no_speech_threshold": params.no_speech_threshold,
|
81 |
+
"temperature": params.temperature,
|
82 |
+
"compression_ratio_threshold": params.compression_ratio_threshold
|
83 |
+
}
|
84 |
+
)
|
85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
segments_result = self.format_result(
|
87 |
transcribed_result=segments,
|
88 |
)
|
|
|
118 |
|
119 |
self.current_compute_type = compute_type
|
120 |
self.current_model_size = model_size
|
|
|
121 |
self.model = pipeline(
|
122 |
"automatic-speech-recognition",
|
123 |
model=os.path.join(self.model_dir, model_size),
|
|
|
137 |
----------
|
138 |
transcribed_result: dict
|
139 |
Transcription result of the insanely_fast_whisper
|
|
|
|
|
140 |
|
141 |
Returns
|
142 |
----------
|
|
|
146 |
result = transcribed_result["chunks"]
|
147 |
for item in result:
|
148 |
start, end = item["timestamp"][0], item["timestamp"][1]
|
149 |
+
if end is None:
|
150 |
+
end = start
|
151 |
item["start"] = start
|
152 |
item["end"] = end
|
153 |
return result
|