Spaces:
Configuration error
Configuration error
File size: 1,605 Bytes
db7bf9a 313814b db7bf9a 313814b db7bf9a 313814b db7bf9a 313814b db7bf9a 313814b db7bf9a 313814b db7bf9a 313814b db7bf9a 313814b db7bf9a 313814b db7bf9a 313814b db7bf9a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
from __future__ import annotations
import enum
from faster_whisper.transcribe import Segment, Word
from pydantic import BaseModel
from speaches.core import Transcription
# https://platform.openai.com/docs/api-reference/audio/createTranscription#audio-createtranscription-response_format
class ResponseFormat(enum.StrEnum):
TEXT = "text"
JSON = "json"
VERBOSE_JSON = "verbose_json"
# VTT = "vtt"
# SRT = "srt"
# https://platform.openai.com/docs/api-reference/audio/json-object
class TranscriptionJsonResponse(BaseModel):
text: str
@classmethod
def from_transcription(
cls, transcription: Transcription
) -> TranscriptionJsonResponse:
return cls(text=transcription.text)
# https://platform.openai.com/docs/api-reference/audio/verbose-json-object
class TranscriptionVerboseJsonResponse(BaseModel):
task: str = "transcribe"
language: str
duration: float
text: str
words: list[Word]
segments: list[Segment]
@classmethod
def from_transcription(
cls, transcription: Transcription
) -> TranscriptionVerboseJsonResponse:
return cls(
language="english", # FIX: hardcoded
duration=transcription.duration,
text=transcription.text,
words=[
Word(
start=word.start,
end=word.end,
word=word.text,
probability=word.probability,
)
for word in transcription.words
],
segments=[], # FIX: hardcoded
)
|