Spaces:
Configuration error
Configuration error
File size: 2,093 Bytes
ec4d8ae 313814b bf48682 313814b ec4d8ae 313814b ec4d8ae 313814b ec4d8ae bf48682 313814b 4bdd7f2 313814b 4bdd7f2 313814b 3a0bd05 313814b 4bdd7f2 313814b ec4d8ae 313814b dc4f25f 313814b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
from __future__ import annotations
import asyncio
import logging
import time
from typing import TYPE_CHECKING
from faster_whisper_server.api_models import TranscriptionSegment, TranscriptionWord
from faster_whisper_server.text_utils import Transcription
if TYPE_CHECKING:
from faster_whisper import transcribe
from faster_whisper_server.audio import Audio
logger = logging.getLogger(__name__)
class FasterWhisperASR:
def __init__(
self,
whisper: transcribe.WhisperModel,
**kwargs,
) -> None:
self.whisper = whisper
self.transcribe_opts = kwargs
def _transcribe(
self,
audio: Audio,
prompt: str | None = None,
) -> tuple[Transcription, transcribe.TranscriptionInfo]:
start = time.perf_counter()
# NOTE: should `BatchedInferencePipeline` be used here?
segments, transcription_info = self.whisper.transcribe(
audio.data,
initial_prompt=prompt,
word_timestamps=True,
**self.transcribe_opts,
)
segments = TranscriptionSegment.from_faster_whisper_segments(segments)
words = TranscriptionWord.from_segments(segments)
for word in words:
word.offset(audio.start)
transcription = Transcription(words)
end = time.perf_counter()
logger.info(
f"Transcribed {audio} in {end - start:.2f} seconds. Prompt: {prompt}. Transcription: {transcription.text}"
)
return (transcription, transcription_info)
async def transcribe(
self,
audio: Audio,
prompt: str | None = None,
) -> tuple[Transcription, transcribe.TranscriptionInfo]:
"""Wrapper around _transcribe so it can be used in async context."""
# is this the optimal way to execute a blocking call in an async context?
# TODO: verify performance when running inference on a CPU
return await asyncio.get_running_loop().run_in_executor(
None,
self._transcribe,
audio,
prompt,
)
|