Spaces:

Mohssinibra
/

STT_Darija_v2

Running

Mohssinibra commited on 10 days ago

Commit

c174689

verified ·

1 Parent(s): 002b689

wav2vect

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,24 +1,32 @@
 import gradio as gr
-from speechbrain.inference.ASR import EncoderASR
-# Load the pre-trained ASR model for Darija
-asr_model = EncoderASR.from_hparams(
-    source="speechbrain/asr-wav2vec2-dvoice-darija",
-    savedir="pretrained_models/asr-wav2vec2-dvoice-darija"
-)
 # Function to process the audio file and return transcription
 def transcribe_audio(audio_file):
-    # Transcribe the uploaded audio file
-    transcription = asr_model.transcribe_file(audio_file)
-    return transcription
 # Create a Gradio interface
 interface = gr.Interface(
-    fn=transcribe_audio,                 # Function to call
-    inputs=gr.Audio(type="filepath"),  # Input component (audio file upload)
-    outputs="text",                      # Output component (text)
-    title="Darija ASR Transcription",    # Title of the interface
     description="Upload an audio file in Darija, and the ASR model will transcribe it into text."  # Description
 )

 import gradio as gr
+from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
+import torch
+# Load the pre-trained Wav2Vec2 model for Darija
+processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-xlsr-53-arabic")
+model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-xlsr-53-arabic")
 # Function to process the audio file and return transcription
 def transcribe_audio(audio_file):
+    # Load and process the audio file
+    audio_input, _ = torchaudio.load(audio_file)
+    input_values = processor(audio_input, return_tensors="pt").input_values
+    # Perform transcription
+    with torch.no_grad():
+        logits = model(input_values).logits
+    # Decode the logits to text
+    predicted_ids = torch.argmax(logits, dim=-1)
+    transcription = processor.batch_decode(predicted_ids)
+    return transcription[0]
 # Create a Gradio interface
 interface = gr.Interface(
+    fn=transcribe_audio,                # Function to call
+    inputs=gr.Audio(type="filepath"),   # Input component (audio file upload)
+    outputs="text",                     # Output component (text)
+    title="Darija ASR Transcription",   # Title of the interface
     description="Upload an audio file in Darija, and the ASR model will transcribe it into text."  # Description
 )