Spaces:
Runtime error
Runtime error
Commit
·
955603e
1
Parent(s):
68f2db7
Upload 5 files
Browse files- .gitattributes +1 -0
- app.py +16 -0
- examples/example_01.wav +0 -0
- examples/example_2.wav +0 -0
- stt_kz_quartznet15x5.nemo +3 -0
- transcribe.py +31 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
stt_kz_quartznet15x5.nemo filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import gradio as gr
|
3 |
+
from model.model import transcribe
|
4 |
+
|
5 |
+
|
6 |
+
title = "Automatic Speech Recognition Using NVIDIA NeMo for Kazakh Speech"
|
7 |
+
example_list = [["examples/" + example] for example in os.listdir("examples")]
|
8 |
+
|
9 |
+
demo = gr.Interface(
|
10 |
+
fn=transcribe,
|
11 |
+
inputs=gr.Audio(source="microphone", type="filepath"),
|
12 |
+
outputs="text",
|
13 |
+
title=title,
|
14 |
+
examples=example_list)
|
15 |
+
|
16 |
+
demo.launch()
|
examples/example_01.wav
ADDED
Binary file (243 kB). View file
|
|
examples/example_2.wav
ADDED
Binary file (241 kB). View file
|
|
stt_kz_quartznet15x5.nemo
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f5dd5ae4ebef4832e19ed80d70ecfd1979e8a1d2158474cc2a986f365b41501
|
3 |
+
size 76380160
|
transcribe.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from pathlib import Path
|
3 |
+
import nemo.collections.asr as nemo_asr
|
4 |
+
|
5 |
+
|
6 |
+
def converter(audio_file):
|
7 |
+
converted = audio_file.split(".")[0] + "converted_.wav"
|
8 |
+
cmd_str = f"ffmpeg -y -i {audio_file} -ac 1 -ar 16000 {converted}"
|
9 |
+
os.system(cmd_str)
|
10 |
+
# os.remove(audio_file)
|
11 |
+
return converted
|
12 |
+
|
13 |
+
|
14 |
+
|
15 |
+
def transcribe(audio_file):
|
16 |
+
wav_file = converter(audio_file)
|
17 |
+
try:
|
18 |
+
text = model_kz.transcribe([wav_file])
|
19 |
+
return text[0]
|
20 |
+
except:
|
21 |
+
return 'Try another file format.'
|
22 |
+
|
23 |
+
|
24 |
+
|
25 |
+
|
26 |
+
language = "kz"
|
27 |
+
|
28 |
+
BASE_DIR = Path(__file__).resolve(strict=True).parent
|
29 |
+
|
30 |
+
model_kz = nemo_asr.models.EncDecCTCModel.restore_from(restore_path=f"{BASE_DIR}/stt_{language}_quartznet15x5.nemo")
|
31 |
+
|