Spaces:

jhj0517
/

Whisper-WebUI

Running

App Files Files Community

jhj0517 commited on Jun 23, 2024

Commit

ec41bf5

1 Parent(s): 75962fd

add base inference

Browse files

Files changed (1) hide show

modules/insanely_fast_whisper_inference.py +97 -0

modules/insanely_fast_whisper_inference.py ADDED Viewed

	@@ -0,0 +1,97 @@

+import whisper
+import gradio as gr
+import time
+import os
+from typing import BinaryIO, Union, Tuple, List
+import numpy as np
+import torch
+from modules.whisper_base import WhisperBase
+from modules.whisper_parameter import *
+class InsanelyFastWhisperInference(WhisperBase):
+    def __init__(self):
+        super().__init__(
+            model_dir=os.path.join("models", "Whisper")
+        )
+    def transcribe(self,
+                   audio: Union[str, np.ndarray, torch.Tensor],
+                   progress: gr.Progress,
+                   *whisper_params,
+                   ) -> Tuple[List[dict], float]:
+        """
+        transcribe method for faster-whisper.
+        Parameters
+        ----------
+        audio: Union[str, BinaryIO, np.ndarray]
+            Audio path or file binary or Audio numpy array
+        progress: gr.Progress
+            Indicator to show progress directly in gradio.
+        *whisper_params: tuple
+            Gradio components related to Whisper. see whisper_data_class.py for details.
+        Returns
+        ----------
+        segments_result: List[dict]
+            list of dicts that includes start, end timestamps and transcribed text
+        elapsed_time: float
+            elapsed time for transcription
+        """
+        start_time = time.time()
+        params = WhisperValues(*whisper_params)
+        if params.model_size != self.current_model_size or self.model is None or self.current_compute_type != params.compute_type:
+            self.update_model(params.model_size, params.compute_type, progress)
+        if params.lang == "Automatic Detection":
+            params.lang = None
+        def progress_callback(progress_value):
+            progress(progress_value, desc="Transcribing..")
+        segments_result = self.model.transcribe(audio=audio,
+                                                language=params.lang,
+                                                verbose=False,
+                                                beam_size=params.beam_size,
+                                                logprob_threshold=params.log_prob_threshold,
+                                                no_speech_threshold=params.no_speech_threshold,
+                                                task="translate" if params.is_translate and self.current_model_size in self.translatable_models else "transcribe",
+                                                fp16=True if params.compute_type == "float16" else False,
+                                                best_of=params.best_of,
+                                                patience=params.patience,
+                                                temperature=params.temperature,
+                                                compression_ratio_threshold=params.compression_ratio_threshold,
+                                                progress_callback=progress_callback,)["segments"]
+        elapsed_time = time.time() - start_time
+        return segments_result, elapsed_time
+    def update_model(self,
+                     model_size: str,
+                     compute_type: str,
+                     progress: gr.Progress,
+                     ):
+        """
+        Update current model setting
+        Parameters
+        ----------
+        model_size: str
+            Size of whisper model
+        compute_type: str
+            Compute type for transcription.
+            see more info : https://opennmt.net/CTranslate2/quantization.html
+        progress: gr.Progress
+            Indicator to show progress directly in gradio.
+        """
+        progress(0, desc="Initializing Model..")
+        self.current_compute_type = compute_type
+        self.current_model_size = model_size
+        self.model = whisper.load_model(
+            name=model_size,
+            device=self.device,
+            download_root=self.model_dir
+        )