transiteration commited on
Commit
955603e
·
1 Parent(s): 68f2db7

Upload 5 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ stt_kz_quartznet15x5.nemo filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from model.model import transcribe
4
+
5
+
6
+ title = "Automatic Speech Recognition Using NVIDIA NeMo for Kazakh Speech"
7
+ example_list = [["examples/" + example] for example in os.listdir("examples")]
8
+
9
+ demo = gr.Interface(
10
+ fn=transcribe,
11
+ inputs=gr.Audio(source="microphone", type="filepath"),
12
+ outputs="text",
13
+ title=title,
14
+ examples=example_list)
15
+
16
+ demo.launch()
examples/example_01.wav ADDED
Binary file (243 kB). View file
 
examples/example_2.wav ADDED
Binary file (241 kB). View file
 
stt_kz_quartznet15x5.nemo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f5dd5ae4ebef4832e19ed80d70ecfd1979e8a1d2158474cc2a986f365b41501
3
+ size 76380160
transcribe.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+ import nemo.collections.asr as nemo_asr
4
+
5
+
6
+ def converter(audio_file):
7
+ converted = audio_file.split(".")[0] + "converted_.wav"
8
+ cmd_str = f"ffmpeg -y -i {audio_file} -ac 1 -ar 16000 {converted}"
9
+ os.system(cmd_str)
10
+ # os.remove(audio_file)
11
+ return converted
12
+
13
+
14
+
15
+ def transcribe(audio_file):
16
+ wav_file = converter(audio_file)
17
+ try:
18
+ text = model_kz.transcribe([wav_file])
19
+ return text[0]
20
+ except:
21
+ return 'Try another file format.'
22
+
23
+
24
+
25
+
26
+ language = "kz"
27
+
28
+ BASE_DIR = Path(__file__).resolve(strict=True).parent
29
+
30
+ model_kz = nemo_asr.models.EncDecCTCModel.restore_from(restore_path=f"{BASE_DIR}/stt_{language}_quartznet15x5.nemo")
31
+