Spaces:

jhj0517
/

Whisper-WebUI

Running

App Files Files Community

Damho Lee commited on May 31, 2023

Commit

6074f61

1 Parent(s): 9f11092

Add to remove input file(s) when finished

Browse files

Files changed (1) hide show

modules/whisper_Inference.py +109 -88

modules/whisper_Inference.py CHANGED Viewed

@@ -22,20 +22,80 @@ class WhisperInference:
         def progress_callback(progress_value):
             progress(progress_value, desc="Transcribing..")
-        if model_size != self.current_model_size or self.model is None:
-            progress(0, desc="Initializing Model..")
-            self.current_model_size = model_size
-            self.model = whisper.load_model(name=model_size, download_root="models/Whisper")
-        if lang == "Automatic Detection":
-            lang = None
-        progress(0, desc="Loading Audio..")
-        files_info = {}
-        for fileobj in fileobjs:
-            audio = whisper.load_audio(fileobj.name)
             translatable_model = ["large", "large-v1", "large-v2"]
             if istranslate and self.current_model_size in translatable_model:
@@ -47,9 +107,7 @@ class WhisperInference:
             progress(1, desc="Completed!")
-            file_name, file_ext = os.path.splitext(os.path.basename(fileobj.orig_name))
-            file_name = file_name[:-9]
-            file_name = safe_filename(file_name)
             timestamp = datetime.now().strftime("%m%d%H%M%S")
             output_path = f"outputs/{file_name}-{timestamp}"
@@ -60,57 +118,14 @@ class WhisperInference:
                 subtitle = get_vtt(result["segments"])
                 write_file(subtitle, f"{output_path}.vtt")
-            files_info[file_name] = subtitle
-        total_result = ''
-        for file_name, subtitle in files_info.items():
-            total_result += '------------------------------------\n'
-            total_result += f'{file_name}\n\n'
-            total_result += f'{subtitle}'
-        return f"Done! Subtitle is in the outputs folder.\n\n{total_result}"
-    def transcribe_youtube(self, youtubelink,
-                           model_size, lang, subformat, istranslate,
-                           progress=gr.Progress()):
-        def progress_callback(progress_value):
-            progress(progress_value, desc="Transcribing..")
-        if model_size != self.current_model_size or self.model is None:
-            progress(0, desc="Initializing Model..")
-            self.current_model_size = model_size
-            self.model = whisper.load_model(name=model_size, download_root="models/Whisper")
-        if lang == "Automatic Detection":
-            lang = None
-        progress(0, desc="Loading Audio from Youtube..")
-        yt = get_ytdata(youtubelink)
-        audio = whisper.load_audio(get_ytaudio(yt))
-        translatable_model = ["large", "large-v1", "large-v2"]
-        if istranslate and self.current_model_size in translatable_model:
-            result = self.model.transcribe(audio=audio, language=lang, verbose=False, task="translate",
-                                           progress_callback=progress_callback)
-        else:
-            result = self.model.transcribe(audio=audio, language=lang, verbose=False,
-                                           progress_callback=progress_callback)
-        progress(1, desc="Completed!")
-        file_name = safe_filename(yt.title)
-        timestamp = datetime.now().strftime("%m%d%H%M%S")
-        output_path = f"outputs/{file_name}-{timestamp}"
-        if subformat == "SRT":
-            subtitle = get_srt(result["segments"])
-            write_file(subtitle, f"{output_path}.srt")
-        elif subformat == "WebVTT":
-            subtitle = get_vtt(result["segments"])
-            write_file(subtitle, f"{output_path}.vtt")
-        return f"Done! Subtitle file is in the outputs folder.\n\n{subtitle}"
     def transcribe_mic(self, micaudio,
                        model_size, lang, subformat, istranslate,
@@ -119,34 +134,40 @@ class WhisperInference:
         def progress_callback(progress_value):
             progress(progress_value, desc="Transcribing..")
-        if model_size != self.current_model_size or self.model is None:
-            progress(0, desc="Initializing Model..")
-            self.current_model_size = model_size
-            self.model = whisper.load_model(name=model_size, download_root="models/Whisper")
-        if lang == "Automatic Detection":
-            lang = None
-        progress(0, desc="Loading Audio..")
-        translatable_model = ["large", "large-v1", "large-v2"]
-        if istranslate and self.current_model_size in translatable_model:
-            result = self.model.transcribe(audio=micaudio, language=lang, verbose=False, task="translate",
-                                           progress_callback=progress_callback)
-        else:
-            result = self.model.transcribe(audio=micaudio, language=lang, verbose=False,
-                                           progress_callback=progress_callback)
-        progress(1, desc="Completed!")
-        timestamp = datetime.now().strftime("%m%d%H%M%S")
-        output_path = f"outputs/Mic-{timestamp}"
-        if subformat == "SRT":
-            subtitle = get_srt(result["segments"])
-            write_file(subtitle, f"{output_path}.srt")
-        elif subformat == "WebVTT":
-            subtitle = get_vtt(result["segments"])
-            write_file(subtitle, f"{output_path}.vtt")
-        return f"Done! Subtitle file is in the outputs folder.\n\n{subtitle}"

         def progress_callback(progress_value):
             progress(progress_value, desc="Transcribing..")
+        try:
+            if model_size != self.current_model_size or self.model is None:
+                progress(0, desc="Initializing Model..")
+                self.current_model_size = model_size
+                self.model = whisper.load_model(name=model_size, download_root="models/Whisper")
+            if lang == "Automatic Detection":
+                lang = None
+            progress(0, desc="Loading Audio..")
+            files_info = {}
+            for fileobj in fileobjs:
+                audio = whisper.load_audio(fileobj.name)
+                translatable_model = ["large", "large-v1", "large-v2"]
+                if istranslate and self.current_model_size in translatable_model:
+                    result = self.model.transcribe(audio=audio, language=lang, verbose=False, task="translate",
+                                                   progress_callback=progress_callback)
+                else:
+                    result = self.model.transcribe(audio=audio, language=lang, verbose=False,
+                                                   progress_callback=progress_callback)
+                progress(1, desc="Completed!")
+                file_name, file_ext = os.path.splitext(os.path.basename(fileobj.orig_name))
+                file_name = file_name[:-9]
+                file_name = safe_filename(file_name)
+                timestamp = datetime.now().strftime("%m%d%H%M%S")
+                output_path = f"outputs/{file_name}-{timestamp}"
+                if subformat == "SRT":
+                    subtitle = get_srt(result["segments"])
+                    write_file(subtitle, f"{output_path}.srt")
+                elif subformat == "WebVTT":
+                    subtitle = get_vtt(result["segments"])
+                    write_file(subtitle, f"{output_path}.vtt")
+                files_info[file_name] = subtitle
+            total_result = ''
+            for file_name, subtitle in files_info.items():
+                total_result += '------------------------------------\n'
+                total_result += f'{file_name}\n\n'
+                total_result += f'{subtitle}'
+            return f"Done! Subtitle is in the outputs folder.\n\n{total_result}"
+        except Exception as e:
+            return str(e)
+        finally:
+            for fileobj in fileobjs:
+                if os.path.exists(fileobj.name):
+                    os.remove(fileobj.name)
+    def transcribe_youtube(self, youtubelink,
+                           model_size, lang, subformat, istranslate,
+                           progress=gr.Progress()):
+        def progress_callback(progress_value):
+            progress(progress_value, desc="Transcribing..")
+        try:
+            if model_size != self.current_model_size or self.model is None:
+                progress(0, desc="Initializing Model..")
+                self.current_model_size = model_size
+                self.model = whisper.load_model(name=model_size, download_root="models/Whisper")
+            if lang == "Automatic Detection":
+                lang = None
+            progress(0, desc="Loading Audio from Youtube..")
+            yt = get_ytdata(youtubelink)
+            audio = whisper.load_audio(get_ytaudio(yt))
             translatable_model = ["large", "large-v1", "large-v2"]
             if istranslate and self.current_model_size in translatable_model:
             progress(1, desc="Completed!")
+            file_name = safe_filename(yt.title)
             timestamp = datetime.now().strftime("%m%d%H%M%S")
             output_path = f"outputs/{file_name}-{timestamp}"
                 subtitle = get_vtt(result["segments"])
                 write_file(subtitle, f"{output_path}.vtt")
+            return f"Done! Subtitle file is in the outputs folder.\n\n{subtitle}"
+        except Exception as e:
+            return str(e)
+        finally:
+            yt = get_ytdata(youtubelink)
+            file_path = get_ytaudio(yt)
+            if os.path.exists(file_path):
+                os.remove(file_path)
     def transcribe_mic(self, micaudio,
                        model_size, lang, subformat, istranslate,
         def progress_callback(progress_value):
             progress(progress_value, desc="Transcribing..")
+        try:
+            if model_size != self.current_model_size or self.model is None:
+                progress(0, desc="Initializing Model..")
+                self.current_model_size = model_size
+                self.model = whisper.load_model(name=model_size, download_root="models/Whisper")
+            if lang == "Automatic Detection":
+                lang = None
+            progress(0, desc="Loading Audio..")
+            translatable_model = ["large", "large-v1", "large-v2"]
+            if istranslate and self.current_model_size in translatable_model:
+                result = self.model.transcribe(audio=micaudio, language=lang, verbose=False, task="translate",
+                                               progress_callback=progress_callback)
+            else:
+                result = self.model.transcribe(audio=micaudio, language=lang, verbose=False,
+                                               progress_callback=progress_callback)
+            progress(1, desc="Completed!")
+            timestamp = datetime.now().strftime("%m%d%H%M%S")
+            output_path = f"outputs/Mic-{timestamp}"
+            if subformat == "SRT":
+                subtitle = get_srt(result["segments"])
+                write_file(subtitle, f"{output_path}.srt")
+            elif subformat == "WebVTT":
+                subtitle = get_vtt(result["segments"])
+                write_file(subtitle, f"{output_path}.vtt")
+            return f"Done! Subtitle file is in the outputs folder.\n\n{subtitle}"
+        except Exception as e:
+            print(str(e))
+        finally:
+            if os.path.exists(micaudio):
+                os.remove(micaudio)