Spaces:
Runtime error
Runtime error
File size: 1,555 Bytes
ca62577 1358486 ca62577 1358486 ca62577 1358486 ca62577 1358486 3d46958 ca62577 1358486 3d46958 ca62577 1358486 ca62577 1358486 ca62577 bbe5135 ca62577 1358486 ca62577 bbe5135 ca62577 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import gradio as gr
from transformers import Wav2Vec2Processor
from transformers import AutoModelForCTC
from conversationalnlp.models.wav2vec2 import Wav2Vec2Predict
from conversationalnlp.models.wav2vec2 import ModelLoader
from conversationalnlp.utils import *
import soundfile as sf
import os
"""
run gradio with
>>python app.py
"""
audiosavepath = os.getcwd()
pretrained_model = "codenamewei/speech-to-text"
processor = Wav2Vec2Processor.from_pretrained(
pretrained_model)
model = AutoModelForCTC.from_pretrained(
pretrained_model)
modelloader = ModelLoader(model, processor)
predictor = Wav2Vec2Predict(modelloader)
examples = ["example1.flac", "example2.flac", "example3.flac"]
def greet(audioarray):
"""
audio array in the following format
(16000, array([ -5277184, 326400, -120320, ..., -5970432, -12745216,
-6934528], dtype=int32))
<class 'tuple'>
"""
audioabspath = os.path.join(audiosavepath, "temp.wav")
# WORKAROUND: Save to file and reread to get the array shape needed for prediction
sf.write(audioabspath, audioarray[1], audioarray[0])
print(f"Audio at path {audioabspath}")
predictiontexts = predictor.predictfiles([audioabspath])
outputtext = predictiontexts["predicted_text"][-1] + \
"\n" + predictiontexts["corrected_text"][-1]
return outputtext
demo = gr.Interface(fn=greet, inputs="audio",
outputs="text",
title="Speech-to-Text",
examples=examples)
demo.launch() # share=True)
|