Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import Wav2Vec2Processor | |
from transformers import AutoModelForCTC | |
from conversationalnlp.models.wav2vec2 import Wav2Vec2Predict | |
from conversationalnlp.models.wav2vec2 import ModelLoader | |
from conversationalnlp.utils import * | |
import soundfile as sf | |
import os | |
""" | |
run gradio with | |
>>python app.py | |
""" | |
audiosavepath = os.getcwd() | |
pretrained_model = "codenamewei/speech-to-text" | |
processor = Wav2Vec2Processor.from_pretrained( | |
pretrained_model) | |
model = AutoModelForCTC.from_pretrained( | |
pretrained_model) | |
modelloader = ModelLoader(model, processor) | |
predictor = Wav2Vec2Predict(modelloader) | |
examples = ["example1.flac", "example2.flac", "example3.flac"] | |
def greet(audioarray): | |
""" | |
audio array in the following format | |
(16000, array([ -5277184, 326400, -120320, ..., -5970432, -12745216, | |
-6934528], dtype=int32)) | |
<class 'tuple'> | |
""" | |
audioabspath = os.path.join(audiosavepath, "temp.wav") | |
# WORKAROUND: Save to file and reread to get the array shape needed for prediction | |
sf.write(audioabspath, audioarray[1], audioarray[0]) | |
print(f"Audio at path {audioabspath}") | |
predictiontexts = predictor.predictfiles([audioabspath]) | |
outputtext = predictiontexts["predicted_text"][-1] + \ | |
"\n" + predictiontexts["corrected_text"][-1] | |
return outputtext | |
demo = gr.Interface(fn=greet, inputs="audio", | |
outputs="text", | |
title="Speech-to-Text", | |
examples=examples) | |
demo.launch() # share=True) | |