Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -726,6 +726,7 @@ from string import punctuation
|
|
726 |
import librosa
|
727 |
from pathlib import Path
|
728 |
import torchaudio
|
|
|
729 |
|
730 |
# Check if the token is already set in the environment variables
|
731 |
hf_token = os.getenv("HF_TOKEN")
|
@@ -1129,25 +1130,25 @@ pipe_asr = pipeline("automatic-speech-recognition", model=model, tokenizer=proce
|
|
1129 |
|
1130 |
base_audio_drive = "/data/audio"
|
1131 |
|
1132 |
-
# Integrate the transcriber function
|
1133 |
-
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
|
1134 |
-
|
1135 |
-
def transcribe(audio):
|
1136 |
-
sr, y = audio
|
1137 |
-
y = y.astype(np.float32)
|
1138 |
-
y /= np.max(np.abs(y))
|
1139 |
-
return transcriber({"sampling_rate": sr, "raw": y})["text"] # type: ignore
|
1140 |
-
|
1141 |
def transcribe_function(stream, new_chunk):
|
1142 |
-
|
|
|
|
|
|
|
|
|
|
|
1143 |
y = y.astype(np.float32) / np.max(np.abs(y))
|
|
|
1144 |
if stream is not None:
|
1145 |
stream = np.concatenate([stream, y])
|
1146 |
else:
|
1147 |
stream = y
|
|
|
1148 |
result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
|
1149 |
-
|
1150 |
-
|
|
|
|
|
1151 |
|
1152 |
def update_map_with_response(history):
|
1153 |
if not history:
|
@@ -1384,6 +1385,12 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
|
|
1384 |
transcribe_button_whisper = gr.Button("Transcribe with Whisper")
|
1385 |
transcribe_button_whisper.click(fn=transcribe_function_whisper, inputs=[audio_input_whisper], outputs=[chat_input], api_name="whisper_asr")
|
1386 |
|
|
|
|
|
|
|
|
|
|
|
|
|
1387 |
with gr.Column():
|
1388 |
weather_output = gr.HTML(value=fetch_local_weather())
|
1389 |
news_output = gr.HTML(value=fetch_local_news())
|
@@ -1404,3 +1411,4 @@ demo.launch(share=True)
|
|
1404 |
|
1405 |
|
1406 |
|
|
|
|
726 |
import librosa
|
727 |
from pathlib import Path
|
728 |
import torchaudio
|
729 |
+
import numpy as np
|
730 |
|
731 |
# Check if the token is already set in the environment variables
|
732 |
hf_token = os.getenv("HF_TOKEN")
|
|
|
1130 |
|
1131 |
base_audio_drive = "/data/audio"
|
1132 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1133 |
def transcribe_function(stream, new_chunk):
|
1134 |
+
try:
|
1135 |
+
sr, y = new_chunk[0], new_chunk[1]
|
1136 |
+
except TypeError:
|
1137 |
+
print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
|
1138 |
+
return stream, "", None
|
1139 |
+
|
1140 |
y = y.astype(np.float32) / np.max(np.abs(y))
|
1141 |
+
|
1142 |
if stream is not None:
|
1143 |
stream = np.concatenate([stream, y])
|
1144 |
else:
|
1145 |
stream = y
|
1146 |
+
|
1147 |
result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
|
1148 |
+
|
1149 |
+
full_text = result.get("text","")
|
1150 |
+
|
1151 |
+
return stream, full_text, result
|
1152 |
|
1153 |
def update_map_with_response(history):
|
1154 |
if not history:
|
|
|
1385 |
transcribe_button_whisper = gr.Button("Transcribe with Whisper")
|
1386 |
transcribe_button_whisper.click(fn=transcribe_function_whisper, inputs=[audio_input_whisper], outputs=[chat_input], api_name="whisper_asr")
|
1387 |
|
1388 |
+
# Streaming ASR component
|
1389 |
+
gr.Markdown("<h2>Streaming ASR</h2>")
|
1390 |
+
stream_audio_input = gr.Audio(sources=["microphone"], type='numpy', streaming=True)
|
1391 |
+
stream_transcription = gr.State(None) # Initialize stream state
|
1392 |
+
stream_audio_input.change(transcribe_function, inputs=[stream_transcription, stream_audio_input], outputs=[stream_transcription, chat_input])
|
1393 |
+
|
1394 |
with gr.Column():
|
1395 |
weather_output = gr.HTML(value=fetch_local_weather())
|
1396 |
news_output = gr.HTML(value=fetch_local_news())
|
|
|
1411 |
|
1412 |
|
1413 |
|
1414 |
+
|