Spaces:

jhj0517
/

Whisper-WebUI

Running

App Files Files Community

jhj0517 commited on May 16, 2024

Commit

46f2826

unverified ·

2 Parent(s): 89ee9c0 8808c7b

Merge pull request #151 from jhj0517/feature/add-params

Browse files

Files changed (4) hide show

app.py +8 -3
modules/faster_whisper_inference.py +2 -1
modules/whisper_Inference.py +2 -1
modules/whisper_parameter.py +7 -1

app.py CHANGED Viewed

@@ -68,6 +68,7 @@ class App:
                         cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
                         tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
                         sd_temperature = gr.Slider(label="Temperature", value=0, step=0.01, maximum=1.0, interactive=True)
                     with gr.Row():
                         btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
                     with gr.Row():
@@ -87,7 +88,8 @@ class App:
                                                              patience=nb_patience,
                                                              condition_on_previous_text=cb_condition_on_previous_text,
                                                              initial_prompt=tb_initial_prompt,
-                                                             temperature=sd_temperature)
                     btn_run.click(fn=self.whisper_inf.transcribe_file,
                                   inputs=params + whisper_params.to_list(),
                                   outputs=[tb_indicator, files_subtitles])
@@ -124,6 +126,7 @@ class App:
                         cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
                         tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
                         sd_temperature = gr.Slider(label="Temperature", value=0, step=0.01, maximum=1.0, interactive=True)
                     with gr.Row():
                         btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
                     with gr.Row():
@@ -143,7 +146,8 @@ class App:
                                                              patience=nb_patience,
                                                              condition_on_previous_text=cb_condition_on_previous_text,
                                                              initial_prompt=tb_initial_prompt,
-                                                             temperature=sd_temperature)
                     btn_run.click(fn=self.whisper_inf.transcribe_youtube,
                                   inputs=params + whisper_params.to_list(),
                                   outputs=[tb_indicator, files_subtitles])
@@ -192,7 +196,8 @@ class App:
                                                              patience=nb_patience,
                                                              condition_on_previous_text=cb_condition_on_previous_text,
                                                              initial_prompt=tb_initial_prompt,
-                                                             temperature=sd_temperature)
                     btn_run.click(fn=self.whisper_inf.transcribe_mic,
                                   inputs=params + whisper_params.to_list(),
                                   outputs=[tb_indicator, files_subtitles])

                         cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
                         tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
                         sd_temperature = gr.Slider(label="Temperature", value=0, step=0.01, maximum=1.0, interactive=True)
+                        nb_compression_ratio_threshold = gr.Number(label="Compression Ratio Threshold", value=2.4, interactive=True)
                     with gr.Row():
                         btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
                     with gr.Row():
                                                              patience=nb_patience,
                                                              condition_on_previous_text=cb_condition_on_previous_text,
                                                              initial_prompt=tb_initial_prompt,
+                                                             temperature=sd_temperature,
+                                                             compression_ratio_threshold=nb_compression_ratio_threshold)
                     btn_run.click(fn=self.whisper_inf.transcribe_file,
                                   inputs=params + whisper_params.to_list(),
                                   outputs=[tb_indicator, files_subtitles])
                         cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
                         tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
                         sd_temperature = gr.Slider(label="Temperature", value=0, step=0.01, maximum=1.0, interactive=True)
+                        nb_compression_ratio_threshold = gr.Number(label="Compression Ratio Threshold", value=2.4, interactive=True)
                     with gr.Row():
                         btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
                     with gr.Row():
                                                              patience=nb_patience,
                                                              condition_on_previous_text=cb_condition_on_previous_text,
                                                              initial_prompt=tb_initial_prompt,
+                                                             temperature=sd_temperature,
+                                                             compression_ratio_threshold=nb_compression_ratio_threshold)
                     btn_run.click(fn=self.whisper_inf.transcribe_youtube,
                                   inputs=params + whisper_params.to_list(),
                                   outputs=[tb_indicator, files_subtitles])
                                                              patience=nb_patience,
                                                              condition_on_previous_text=cb_condition_on_previous_text,
                                                              initial_prompt=tb_initial_prompt,
+                                                             temperature=sd_temperature,
+                                                             compression_ratio_threshold=nb_compression_ratio_threshold)
                     btn_run.click(fn=self.whisper_inf.transcribe_mic,
                                   inputs=params + whisper_params.to_list(),
                                   outputs=[tb_indicator, files_subtitles])

modules/faster_whisper_inference.py CHANGED Viewed

@@ -269,7 +269,8 @@ class FasterWhisperInference(BaseInterface):
             no_speech_threshold=params.no_speech_threshold,
             best_of=params.best_of,
             patience=params.patience,
-            temperature=params.temperature
         )
         progress(0, desc="Loading audio..")

             no_speech_threshold=params.no_speech_threshold,
             best_of=params.best_of,
             patience=params.patience,
+            temperature=params.temperature,
+            compression_ratio_threshold=params.compression_ratio_threshold,
         )
         progress(0, desc="Loading audio..")

modules/whisper_Inference.py CHANGED Viewed

@@ -258,7 +258,8 @@ class WhisperInference(BaseInterface):
                                                 best_of=params.best_of,
                                                 patience=params.patience,
                                                 temperature=params.temperature,
-                                                progress_callback=progress_callback)["segments"]
         elapsed_time = time.time() - start_time
         return segments_result, elapsed_time

                                                 best_of=params.best_of,
                                                 patience=params.patience,
                                                 temperature=params.temperature,
+                                                compression_ratio_threshold=params.compression_ratio_threshold,
+                                                progress_callback=progress_callback,)["segments"]
         elapsed_time = time.time() - start_time
         return segments_result, elapsed_time

modules/whisper_parameter.py CHANGED Viewed

@@ -17,6 +17,7 @@ class WhisperGradioComponents:
     condition_on_previous_text: gr.Checkbox
     initial_prompt: gr.Textbox
     temperature: gr.Slider
     """
     A data class for Gradio components of the Whisper Parameters. Use "before" Gradio pre-processing.
     See more about Gradio pre-processing: https://www.gradio.app/docs/components
@@ -64,9 +65,13 @@ class WhisperGradioComponents:
         "prompt-engineer" a context for transcription, e.g. custom vocabularies or proper nouns
         to make it more likely to predict those word correctly.
-    temperature: Temperature for sampling. It can be a tuple of temperatures,
             which will be successively used upon failures according to either
             `compression_ratio_threshold` or `log_prob_threshold`.
     """
     def to_list(self) -> list:
@@ -95,6 +100,7 @@ class WhisperValues:
     condition_on_previous_text: bool
     initial_prompt: Optional[str]
     temperature: float
     """
     A data class to use Whisper parameters. Use "after" Gradio pre-processing.
     See more about Gradio pre-processing: : https://www.gradio.app/docs/components

     condition_on_previous_text: gr.Checkbox
     initial_prompt: gr.Textbox
     temperature: gr.Slider
+    compression_ratio_threshold: gr.Number
     """
     A data class for Gradio components of the Whisper Parameters. Use "before" Gradio pre-processing.
     See more about Gradio pre-processing: https://www.gradio.app/docs/components
         "prompt-engineer" a context for transcription, e.g. custom vocabularies or proper nouns
         to make it more likely to predict those word correctly.
+    temperature: gr.Slider
+            Temperature for sampling. It can be a tuple of temperatures,
             which will be successively used upon failures according to either
             `compression_ratio_threshold` or `log_prob_threshold`.
+    compression_ratio_threshold: gr.Number
+        If the gzip compression ratio is above this value, treat as failed
     """
     def to_list(self) -> list:
     condition_on_previous_text: bool
     initial_prompt: Optional[str]
     temperature: float
+    compression_ratio_threshold: float
     """
     A data class to use Whisper parameters. Use "after" Gradio pre-processing.
     See more about Gradio pre-processing: : https://www.gradio.app/docs/components