IT2091024v2

Paused

App Files Files Community

Pijush2023 commited on Jul 22, 2024

Commit

986787e

verified ·

1 Parent(s): e955af0

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -12

app.py CHANGED Viewed

@@ -726,6 +726,7 @@ from string import punctuation
 import librosa
 from pathlib import Path
 import torchaudio
 # Check if the token is already set in the environment variables
 hf_token = os.getenv("HF_TOKEN")
@@ -1129,25 +1130,25 @@ pipe_asr = pipeline("automatic-speech-recognition", model=model, tokenizer=proce
 base_audio_drive = "/data/audio"
-# Integrate the transcriber function
-transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
-def transcribe(audio):
-    sr, y = audio
-    y = y.astype(np.float32)
-    y /= np.max(np.abs(y))
-    return transcriber({"sampling_rate": sr, "raw": y})["text"]  # type: ignore
 def transcribe_function(stream, new_chunk):
-    sr, y = new_chunk[0], new_chunk[1]
     y = y.astype(np.float32) / np.max(np.abs(y))
     if stream is not None:
         stream = np.concatenate([stream, y])
     else:
         stream = y
     result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
-    full_text = result.get("text", "")
-    return stream, full_text  # Return the transcribed text
 def update_map_with_response(history):
     if not history:
@@ -1384,6 +1385,12 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
             transcribe_button_whisper = gr.Button("Transcribe with Whisper")
             transcribe_button_whisper.click(fn=transcribe_function_whisper, inputs=[audio_input_whisper], outputs=[chat_input], api_name="whisper_asr")
         with gr.Column():
              weather_output = gr.HTML(value=fetch_local_weather())
              news_output = gr.HTML(value=fetch_local_news())
@@ -1404,3 +1411,4 @@ demo.launch(share=True)

 import librosa
 from pathlib import Path
 import torchaudio
+import numpy as np
 # Check if the token is already set in the environment variables
 hf_token = os.getenv("HF_TOKEN")
 base_audio_drive = "/data/audio"
 def transcribe_function(stream, new_chunk):
+    try:
+        sr, y = new_chunk[0], new_chunk[1]
+    except TypeError:
+        print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
+        return stream, "", None
     y = y.astype(np.float32) / np.max(np.abs(y))
     if stream is not None:
         stream = np.concatenate([stream, y])
     else:
         stream = y
     result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
+    full_text = result.get("text","")
+    return stream, full_text, result
 def update_map_with_response(history):
     if not history:
             transcribe_button_whisper = gr.Button("Transcribe with Whisper")
             transcribe_button_whisper.click(fn=transcribe_function_whisper, inputs=[audio_input_whisper], outputs=[chat_input], api_name="whisper_asr")
+            # Streaming ASR component
+            gr.Markdown("<h2>Streaming ASR</h2>")
+            stream_audio_input = gr.Audio(sources=["microphone"], type='numpy', streaming=True)
+            stream_transcription = gr.State(None)  # Initialize stream state
+            stream_audio_input.change(transcribe_function, inputs=[stream_transcription, stream_audio_input], outputs=[stream_transcription, chat_input])
         with gr.Column():
              weather_output = gr.HTML(value=fetch_local_weather())
              news_output = gr.HTML(value=fetch_local_news())