from transformers import pipeline import gradio as gr import time pipe = pipeline( model="dvislobokov/whisper-large-v3-turbo-russian", tokenizer="dvislobokov/whisper-large-v3-turbo-russian", task='automatic-speech-recognition', device='cpu' ) def transcribe(audio): start = time.time() text = pipe(audio, return_timestamps=True)['text'] spent_time = (time.time() - start) return f'Spent time: {spent_time}\nText: {text}' iface = gr.Interface( fn=transcribe, inputs=gr.Audio(sources=['microphone', 'upload'], type='filepath'), outputs='text' ) iface.launch(share=True)