File size: 2,583 Bytes
b995db6
 
 
 
93d8861
b995db6
 
 
 
 
 
 
 
 
 
 
 
93d8861
b995db6
 
 
 
93d8861
b995db6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93d8861
 
b995db6
 
 
 
93d8861
b995db6
 
 
 
 
 
 
 
 
93d8861
 
 
 
 
b995db6
93d8861
 
 
 
 
 
 
 
 
 
 
 
 
b995db6
 
 
93d8861
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import os
from pathlib import Path
import subprocess
import threading
import time

import httpx
import keyboard

# NOTE: this is a very basic implementation. Not really meant for usage by others.
# Included here in case someone wants to use it as a reference.

# This script will run in the background and listen for a keybind to start recording audio.
# It will then wait until the keybind is pressed again to stop recording.
# The audio file will be sent to the server for transcription.
# The transcription will be copied to the clipboard.
# When having a short audio of a couple of sentences and running inference on a GPU the response time is very fast (less than 2 seconds).  # noqa: E501
# Run this with `sudo -E python scripts/client.py`

CHUNK = 2**12
AUDIO_RECORD_CMD = [
    "ffmpeg",
    "-hide_banner",
    # "-loglevel",
    # "quiet",
    "-f",
    "alsa",
    "-i",
    "default",
    "-f",
    "wav",
]
COPY_TO_CLIPBOARD_CMD = "wl-copy"
OPENAI_BASE_URL = "ws://localhost:8000/v1"
TRANSCRIBE_PATH = "/audio/transcriptions?language=en"
USER = "nixos"
TIMEOUT = httpx.Timeout(None)
KEYBIND = "ctrl+x"
LANGUAGE = "en"
RESPONSE_FORMAT = "text"

client = httpx.Client(base_url=OPENAI_BASE_URL, timeout=TIMEOUT)
is_running = threading.Event()

file = Path("test.wav")  # HACK: I had a hard time trying to use a temporary file due to permissions issues


while True:
    keyboard.wait(KEYBIND)
    print("Recording started")
    process = subprocess.Popen(
        [*AUDIO_RECORD_CMD, "-y", str(file.name)],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        user=USER,
        env=dict(os.environ),
    )
    keyboard.wait(KEYBIND)
    process.kill()
    stdout, stderr = process.communicate()
    if stdout or stderr:
        print(f"stdout: {stdout}")
        print(f"stderr: {stderr}")
    print(f"Recording finished. File size: {file.stat().st_size} bytes")

    try:
        with open(file, "rb") as fd:
            start = time.perf_counter()
            res = client.post(
                OPENAI_BASE_URL + TRANSCRIBE_PATH,
                files={"file": fd},
                data={
                    "response_format": RESPONSE_FORMAT,
                    "language": LANGUAGE,
                },
            )
        end = time.perf_counter()
        print(f"Transcription took {end - start} seconds")
        transcription = res.text
        print(transcription)
        subprocess.run([COPY_TO_CLIPBOARD_CMD], input=transcription.encode(), check=True)
    except httpx.ConnectError as e:
        print(f"Couldn't connect to server: {e}")