Spaces:
Configuration error
Configuration error
File size: 2,134 Bytes
b995db6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import os
from pathlib import Path
import subprocess
import threading
import httpx
import keyboard
# NOTE: this is a very basic implementation. Not really meant for usage by others.
# Included here in case someone wants to use it as a reference.
# This script will run in the background and listen for a keybind to start recording audio.
# It will then wait until the keybind is pressed again to stop recording.
# The audio file will be sent to the server for transcription.
# The transcription will be copied to the clipboard.
# When having a short audio of a couple of sentences and running inference on a GPU the response time is very fast (less than 2 seconds). # noqa: E501
CHUNK = 2**12
AUDIO_RECORD_CMD = [
"ffmpeg",
# "-hide_banner",
# "-loglevel",
# "quiet",
"-f",
"alsa",
"-i",
"default",
"-f",
"wav",
# "-ac",
# "1",
# "-ar",
# "16000",
# "-f",
# "s16le",
# "-acodec",
# "pcm_s16le",
# "-",
]
COPY_TO_CLIPBOARD_CMD = "wl-copy"
OPENAI_BASE_URL = "ws://localhost:8000/v1"
TRANSCRIBE_PATH = "/audio/transcriptions?language=en"
USER = "nixos"
TIMEOUT = httpx.Timeout(None)
KEYBIND = "ctrl+x"
LANGUAGE = "en"
RESPONSE_FORMAT = "text"
client = httpx.Client(base_url=OPENAI_BASE_URL, timeout=TIMEOUT)
is_running = threading.Event()
file = Path("test.wav") # TODO: use tempfile
while True:
keyboard.wait(KEYBIND)
print("Action started")
process = subprocess.Popen(
[*AUDIO_RECORD_CMD, "-y", str(file.name)],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
user=USER,
env=dict(os.environ),
)
keyboard.wait(KEYBIND)
process.kill()
print("Action finished")
with open(file, "rb") as f:
res = client.post(
OPENAI_BASE_URL + TRANSCRIBE_PATH,
files={"file": f},
data={
"response_format": RESPONSE_FORMAT,
"language": LANGUAGE,
},
)
transcription = res.text
print(transcription)
subprocess.run([COPY_TO_CLIPBOARD_CMD], input=transcription.encode(), check=True)
|