Spaces:
Running
Running
File size: 5,199 Bytes
63ab978 f9abd83 63ab978 cf9d62f f9abd83 63ab978 cf9d62f 63ab978 cf9d62f 63ab978 cf9d62f 63ab978 cf9d62f 63ab978 cf9d62f 63ab978 cf9d62f 63ab978 cf9d62f 63ab978 cf9d62f 63ab978 cf9d62f 63ab978 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
import whisper
from modules.subtitle_manager import get_srt,get_vtt,write_srt,write_vtt,safe_filename
from modules.youtube_manager import get_ytdata,get_ytaudio
import gradio as gr
import os
from datetime import datetime
class WhisperInference():
def __init__(self):
print("\nInitializing Model..\n")
self.current_model_size = "large-v2"
self.model = whisper.load_model(self.current_model_size)
self.available_models = whisper.available_models()
self.available_langs = sorted(list(whisper.tokenizer.LANGUAGES.values()))
def transcribe_file(self,fileobj
,model_size,lang,subformat,istranslate,
progress=gr.Progress()):
def progress_callback(progress_value):
progress(progress_value,desc="Transcribing..")
if model_size != self.current_model_size:
progress(0,desc="Initializing Model..")
self.current_model_size = model_size
self.model = whisper.load_model(model_size)
if lang == "Automatic Detection" :
lang = None
progress(0,desc="Loading Audio..")
audio = whisper.load_audio(fileobj.name)
if istranslate == True:
result = self.model.transcribe(audio=audio,language=lang,verbose=False,task="translate",progress_callback=progress_callback)
else :
result = self.model.transcribe(audio=audio,language=lang,verbose=False,progress_callback=progress_callback)
progress(1,desc="Completed!")
file_name, file_ext = os.path.splitext(os.path.basename(fileobj.orig_name))
file_name = file_name[:-9]
file_name = safe_filename(file_name)
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
output_path = f"outputs/{file_name}-{timestamp}"
if subformat == "SRT":
subtitle = get_srt(result["segments"])
write_srt(subtitle,f"{output_path}.srt")
elif subformat == "WebVTT":
subtitle = get_vtt(result["segments"])
write_vtt(subtitle,f"{output_path}.vtt")
return f"Done! Subtitle is in the outputs folder.\n\n{subtitle}"
def transcribe_youtube(self,youtubelink
,model_size,lang,subformat,istranslate,
progress=gr.Progress()):
def progress_callback(progress_value):
progress(progress_value,desc="Transcribing..")
if model_size != self.current_model_size:
progress(0,desc="Initializing Model..")
self.current_model_size = model_size
self.model = whisper.load_model(model_size)
if lang == "Automatic Detection" :
lang = None
progress(0,desc="Loading Audio from Youtube..")
yt = get_ytdata(youtubelink)
audio = whisper.load_audio(get_ytaudio(yt))
if istranslate == True:
result = self.model.transcribe(audio=audio,language=lang,verbose=False,task="translate",progress_callback=progress_callback)
else :
result = self.model.transcribe(audio=audio,language=lang,verbose=False,progress_callback=progress_callback)
progress(1,desc="Completed!")
file_name = safe_filename(yt.title)
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
output_path = f"outputs/{file_name}-{timestamp}"
if subformat == "SRT":
subtitle = get_srt(result["segments"])
write_srt(subtitle,f"{output_path}.srt")
elif subformat == "WebVTT":
subtitle = get_vtt(result["segments"])
write_vtt(subtitle,f"{output_path}.vtt")
return f"Done! Subtitle file is in the outputs folder.\n\n{subtitle}"
def transcribe_mic(self,micaudio
,model_size,lang,subformat,istranslate,
progress=gr.Progress()):
def progress_callback(progress_value):
progress(progress_value,desc="Transcribing..")
if model_size != self.current_model_size:
progress(0,desc="Initializing Model..")
self.current_model_size = model_size
self.model = whisper.load_model(model_size)
if lang == "Automatic Detection" :
lang = None
progress(0,desc="Loading Audio..")
if istranslate == True:
result = self.model.transcribe(audio=micaudio,language=lang,verbose=False,task="translate",progress_callback=progress_callback)
else :
result = self.model.transcribe(audio=micaudio,language=lang,verbose=False,progress_callback=progress_callback)
progress(1,desc="Completed!")
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
output_path = f"outputs/Mic-{timestamp}"
if subformat == "SRT":
subtitle = get_srt(result["segments"])
write_srt(subtitle,f"{output_path}.srt")
elif subformat == "WebVTT":
subtitle = get_vtt(result["segments"])
write_vtt(subtitle,f"{output_path}.vtt")
return f"Done! Subtitle file is in the outputs folder.\n\n{subtitle}"
|