Pijush2023 commited on
Commit
986787e
·
verified ·
1 Parent(s): e955af0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -12
app.py CHANGED
@@ -726,6 +726,7 @@ from string import punctuation
726
  import librosa
727
  from pathlib import Path
728
  import torchaudio
 
729
 
730
  # Check if the token is already set in the environment variables
731
  hf_token = os.getenv("HF_TOKEN")
@@ -1129,25 +1130,25 @@ pipe_asr = pipeline("automatic-speech-recognition", model=model, tokenizer=proce
1129
 
1130
  base_audio_drive = "/data/audio"
1131
 
1132
- # Integrate the transcriber function
1133
- transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
1134
-
1135
- def transcribe(audio):
1136
- sr, y = audio
1137
- y = y.astype(np.float32)
1138
- y /= np.max(np.abs(y))
1139
- return transcriber({"sampling_rate": sr, "raw": y})["text"] # type: ignore
1140
-
1141
  def transcribe_function(stream, new_chunk):
1142
- sr, y = new_chunk[0], new_chunk[1]
 
 
 
 
 
1143
  y = y.astype(np.float32) / np.max(np.abs(y))
 
1144
  if stream is not None:
1145
  stream = np.concatenate([stream, y])
1146
  else:
1147
  stream = y
 
1148
  result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
1149
- full_text = result.get("text", "")
1150
- return stream, full_text # Return the transcribed text
 
 
1151
 
1152
  def update_map_with_response(history):
1153
  if not history:
@@ -1384,6 +1385,12 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
1384
  transcribe_button_whisper = gr.Button("Transcribe with Whisper")
1385
  transcribe_button_whisper.click(fn=transcribe_function_whisper, inputs=[audio_input_whisper], outputs=[chat_input], api_name="whisper_asr")
1386
 
 
 
 
 
 
 
1387
  with gr.Column():
1388
  weather_output = gr.HTML(value=fetch_local_weather())
1389
  news_output = gr.HTML(value=fetch_local_news())
@@ -1404,3 +1411,4 @@ demo.launch(share=True)
1404
 
1405
 
1406
 
 
 
726
  import librosa
727
  from pathlib import Path
728
  import torchaudio
729
+ import numpy as np
730
 
731
  # Check if the token is already set in the environment variables
732
  hf_token = os.getenv("HF_TOKEN")
 
1130
 
1131
  base_audio_drive = "/data/audio"
1132
 
 
 
 
 
 
 
 
 
 
1133
  def transcribe_function(stream, new_chunk):
1134
+ try:
1135
+ sr, y = new_chunk[0], new_chunk[1]
1136
+ except TypeError:
1137
+ print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
1138
+ return stream, "", None
1139
+
1140
  y = y.astype(np.float32) / np.max(np.abs(y))
1141
+
1142
  if stream is not None:
1143
  stream = np.concatenate([stream, y])
1144
  else:
1145
  stream = y
1146
+
1147
  result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
1148
+
1149
+ full_text = result.get("text","")
1150
+
1151
+ return stream, full_text, result
1152
 
1153
  def update_map_with_response(history):
1154
  if not history:
 
1385
  transcribe_button_whisper = gr.Button("Transcribe with Whisper")
1386
  transcribe_button_whisper.click(fn=transcribe_function_whisper, inputs=[audio_input_whisper], outputs=[chat_input], api_name="whisper_asr")
1387
 
1388
+ # Streaming ASR component
1389
+ gr.Markdown("<h2>Streaming ASR</h2>")
1390
+ stream_audio_input = gr.Audio(sources=["microphone"], type='numpy', streaming=True)
1391
+ stream_transcription = gr.State(None) # Initialize stream state
1392
+ stream_audio_input.change(transcribe_function, inputs=[stream_transcription, stream_audio_input], outputs=[stream_transcription, chat_input])
1393
+
1394
  with gr.Column():
1395
  weather_output = gr.HTML(value=fetch_local_weather())
1396
  news_output = gr.HTML(value=fetch_local_news())
 
1411
 
1412
 
1413
 
1414
+