Spaces:
Sleeping
Sleeping
matt HOFFNER
commited on
Commit
·
20635bb
1
Parent(s):
a74fc7e
browser testing
Browse files- app/hooks/useSpeechRecognition.ts +0 -29
- app/input.tsx +36 -12
- app/progress.tsx +19 -0
app/hooks/useSpeechRecognition.ts
DELETED
@@ -1,29 +0,0 @@
|
|
1 |
-
"use client";
|
2 |
-
|
3 |
-
import { useState, useEffect, useCallback } from "react";
|
4 |
-
import { useTranscriber } from "./useTranscriber";
|
5 |
-
|
6 |
-
const useSpeechRecognition = () => {
|
7 |
-
const [recognizedText, setRecognizedText] = useState('');
|
8 |
-
const transcriber = useTranscriber();
|
9 |
-
|
10 |
-
const startListening = useCallback((audioData: any) => {
|
11 |
-
if (!transcriber.isBusy && !transcriber.isModelLoading) {
|
12 |
-
transcriber.start(audioData);
|
13 |
-
}
|
14 |
-
}, [transcriber]);
|
15 |
-
|
16 |
-
const stopListening = useCallback(() => {
|
17 |
-
console.log("Stopped listening...", recognizedText);
|
18 |
-
}, [recognizedText]); // Updated dependency array
|
19 |
-
|
20 |
-
useEffect(() => {
|
21 |
-
if (transcriber.output && !transcriber.isBusy) {
|
22 |
-
setRecognizedText(transcriber.output.text);
|
23 |
-
}
|
24 |
-
}, [transcriber.output, transcriber.isBusy]);
|
25 |
-
|
26 |
-
return { startListening, stopListening, recognizedText };
|
27 |
-
};
|
28 |
-
|
29 |
-
export default useSpeechRecognition;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/input.tsx
CHANGED
@@ -1,11 +1,12 @@
|
|
1 |
-
import React, { useState, useEffect, useRef } from 'react';
|
2 |
import styles from './page.module.css';
|
3 |
-
import useSpeechRecognition from './hooks/useSpeechRecognition';
|
4 |
import { useMicVAD } from "@ricky0123/vad-react";
|
5 |
import * as ort from "onnxruntime-web";
|
6 |
import MicIcon from '@mui/icons-material/Mic';
|
7 |
import StopIcon from '@mui/icons-material/Stop';
|
8 |
import { webmFixDuration } from './BlobFix';
|
|
|
|
|
9 |
|
10 |
ort.env.wasm.wasmPaths = "/_next/static/chunks/";
|
11 |
|
@@ -46,9 +47,20 @@ const VoiceInputForm: React.FC<VoiceInputFormProps> = ({ handleSubmit, input, se
|
|
46 |
const streamRef = useRef<MediaStream | null>(null);
|
47 |
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
|
48 |
const chunksRef = useRef<Blob[]>([]);
|
49 |
-
const
|
|
|
50 |
|
51 |
-
const
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
useEffect(() => {
|
54 |
if (recognizedText) {
|
@@ -69,7 +81,7 @@ const VoiceInputForm: React.FC<VoiceInputFormProps> = ({ handleSubmit, input, se
|
|
69 |
};
|
70 |
|
71 |
processRecording();
|
72 |
-
}, [recordedBlob, startListening]);
|
73 |
|
74 |
const vad = useMicVAD({
|
75 |
modelURL: "/_next/static/chunks/silero_vad.onnx",
|
@@ -77,10 +89,7 @@ const VoiceInputForm: React.FC<VoiceInputFormProps> = ({ handleSubmit, input, se
|
|
77 |
startOnLoad: false,
|
78 |
onSpeechEnd: async () => {
|
79 |
if (recording) {
|
80 |
-
await stopRecording(); // Stop the recording
|
81 |
-
|
82 |
-
console.log('input', input);
|
83 |
-
|
84 |
setRecording(!recording); // Update the recording state
|
85 |
}
|
86 |
},
|
@@ -94,17 +103,17 @@ const VoiceInputForm: React.FC<VoiceInputFormProps> = ({ handleSubmit, input, se
|
|
94 |
mediaRecorderRef.current.stop(); // set state to inactive
|
95 |
setDuration(0);
|
96 |
setRecording(false);
|
97 |
-
vad.toggle();
|
98 |
}
|
99 |
};
|
100 |
|
101 |
const startRecording = async () => {
|
102 |
// Reset recording (if any)
|
103 |
setRecordedBlob(null);
|
104 |
-
vad.toggle();
|
105 |
|
106 |
let startTime = Date.now();
|
107 |
|
|
|
|
|
108 |
try {
|
109 |
if (!streamRef.current) {
|
110 |
streamRef.current = await navigator.mediaDevices.getUserMedia({
|
@@ -175,6 +184,21 @@ const VoiceInputForm: React.FC<VoiceInputFormProps> = ({ handleSubmit, input, se
|
|
175 |
|
176 |
return (
|
177 |
<div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
<form onSubmit={handleSubmit} className={styles.form}>
|
179 |
<input
|
180 |
type="text"
|
@@ -190,7 +214,7 @@ const VoiceInputForm: React.FC<VoiceInputFormProps> = ({ handleSubmit, input, se
|
|
190 |
onClick={handleToggleRecording}
|
191 |
>
|
192 |
{recording ? <StopIcon /> : <MicIcon />}
|
193 |
-
</button>
|
194 |
</div>
|
195 |
);
|
196 |
};
|
|
|
1 |
+
import React, { useState, useEffect, useRef, useCallback } from 'react';
|
2 |
import styles from './page.module.css';
|
|
|
3 |
import { useMicVAD } from "@ricky0123/vad-react";
|
4 |
import * as ort from "onnxruntime-web";
|
5 |
import MicIcon from '@mui/icons-material/Mic';
|
6 |
import StopIcon from '@mui/icons-material/Stop';
|
7 |
import { webmFixDuration } from './BlobFix';
|
8 |
+
import Progress from './progress';
|
9 |
+
import { useTranscriber } from "./hooks/useTranscriber";
|
10 |
|
11 |
ort.env.wasm.wasmPaths = "/_next/static/chunks/";
|
12 |
|
|
|
47 |
const streamRef = useRef<MediaStream | null>(null);
|
48 |
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
|
49 |
const chunksRef = useRef<Blob[]>([]);
|
50 |
+
const [recognizedText, setRecognizedText] = useState('');
|
51 |
+
const transcriber = useTranscriber();
|
52 |
|
53 |
+
const startListening = useCallback((audioData: any) => {
|
54 |
+
if (!transcriber.isBusy && !transcriber.isModelLoading) {
|
55 |
+
transcriber.start(audioData);
|
56 |
+
}
|
57 |
+
}, [transcriber]);
|
58 |
+
|
59 |
+
useEffect(() => {
|
60 |
+
if (transcriber.output) {
|
61 |
+
setRecognizedText(transcriber.output.text);
|
62 |
+
}
|
63 |
+
}, [transcriber.output, transcriber.isBusy]);
|
64 |
|
65 |
useEffect(() => {
|
66 |
if (recognizedText) {
|
|
|
81 |
};
|
82 |
|
83 |
processRecording();
|
84 |
+
}, [recording, recordedBlob, startListening]);
|
85 |
|
86 |
const vad = useMicVAD({
|
87 |
modelURL: "/_next/static/chunks/silero_vad.onnx",
|
|
|
89 |
startOnLoad: false,
|
90 |
onSpeechEnd: async () => {
|
91 |
if (recording) {
|
92 |
+
await stopRecording(); // Stop the recording
|
|
|
|
|
|
|
93 |
setRecording(!recording); // Update the recording state
|
94 |
}
|
95 |
},
|
|
|
103 |
mediaRecorderRef.current.stop(); // set state to inactive
|
104 |
setDuration(0);
|
105 |
setRecording(false);
|
|
|
106 |
}
|
107 |
};
|
108 |
|
109 |
const startRecording = async () => {
|
110 |
// Reset recording (if any)
|
111 |
setRecordedBlob(null);
|
|
|
112 |
|
113 |
let startTime = Date.now();
|
114 |
|
115 |
+
vad.start();
|
116 |
+
|
117 |
try {
|
118 |
if (!streamRef.current) {
|
119 |
streamRef.current = await navigator.mediaDevices.getUserMedia({
|
|
|
184 |
|
185 |
return (
|
186 |
<div>
|
187 |
+
{transcriber.progressItems.length > 0 && (
|
188 |
+
<div>
|
189 |
+
<label>
|
190 |
+
Loading model files... (only run once)
|
191 |
+
</label>
|
192 |
+
{transcriber.progressItems.map((data) => (
|
193 |
+
<div key={data.file}>
|
194 |
+
<Progress
|
195 |
+
text={data.file}
|
196 |
+
percentage={data.progress}
|
197 |
+
/>
|
198 |
+
</div>
|
199 |
+
))}
|
200 |
+
</div>
|
201 |
+
)}
|
202 |
<form onSubmit={handleSubmit} className={styles.form}>
|
203 |
<input
|
204 |
type="text"
|
|
|
214 |
onClick={handleToggleRecording}
|
215 |
>
|
216 |
{recording ? <StopIcon /> : <MicIcon />}
|
217 |
+
</button>
|
218 |
</div>
|
219 |
);
|
220 |
};
|
app/progress.tsx
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
export default function Progress({
|
2 |
+
text,
|
3 |
+
percentage,
|
4 |
+
}: {
|
5 |
+
text: string;
|
6 |
+
percentage: number;
|
7 |
+
}) {
|
8 |
+
percentage = percentage ?? 0;
|
9 |
+
return (
|
10 |
+
<div className='mt-0.5 w-full relative text-sm text-white background-bg-cyan-400 bg-gray-200 border-1 border-gray-400 rounded-lg text-left overflow-hidden'>
|
11 |
+
<div
|
12 |
+
className='top-0 h-full bg-blue-500 whitespace-nowrap px-2'
|
13 |
+
style={{ width: `${percentage}%` }}
|
14 |
+
>
|
15 |
+
{text} ({`${percentage.toFixed(2)}%`})
|
16 |
+
</div>
|
17 |
+
</div>
|
18 |
+
);
|
19 |
+
}
|