Spaces:
Configuration error
Configuration error
from __future__ import annotations | |
from typing import TYPE_CHECKING, Literal | |
from pydantic import BaseModel, ConfigDict, Field | |
from faster_whisper_server.core import Segment, Transcription, Word, segments_to_text | |
if TYPE_CHECKING: | |
from faster_whisper.transcribe import TranscriptionInfo | |
# https://platform.openai.com/docs/api-reference/audio/json-object | |
class TranscriptionJsonResponse(BaseModel): | |
text: str | |
def from_segments(cls, segments: list[Segment]) -> TranscriptionJsonResponse: | |
return cls(text=segments_to_text(segments)) | |
def from_transcription(cls, transcription: Transcription) -> TranscriptionJsonResponse: | |
return cls(text=transcription.text) | |
# https://platform.openai.com/docs/api-reference/audio/verbose-json-object | |
class TranscriptionVerboseJsonResponse(BaseModel): | |
task: str = "transcribe" | |
language: str | |
duration: float | |
text: str | |
words: list[Word] | |
segments: list[Segment] | |
def from_segment(cls, segment: Segment, transcription_info: TranscriptionInfo) -> TranscriptionVerboseJsonResponse: | |
return cls( | |
language=transcription_info.language, | |
duration=segment.end - segment.start, | |
text=segment.text, | |
words=(segment.words if isinstance(segment.words, list) else []), | |
segments=[segment], | |
) | |
def from_segments( | |
cls, segments: list[Segment], transcription_info: TranscriptionInfo | |
) -> TranscriptionVerboseJsonResponse: | |
return cls( | |
language=transcription_info.language, | |
duration=transcription_info.duration, | |
text=segments_to_text(segments), | |
segments=segments, | |
words=Word.from_segments(segments), | |
) | |
def from_transcription(cls, transcription: Transcription) -> TranscriptionVerboseJsonResponse: | |
return cls( | |
language="english", # FIX: hardcoded | |
duration=transcription.duration, | |
text=transcription.text, | |
words=transcription.words, | |
segments=[], # FIX: hardcoded | |
) | |
class ModelListResponse(BaseModel): | |
data: list[ModelObject] | |
object: Literal["list"] = "list" | |
class ModelObject(BaseModel): | |
id: str | |
"""The model identifier, which can be referenced in the API endpoints.""" | |
created: int | |
"""The Unix timestamp (in seconds) when the model was created.""" | |
object_: Literal["model"] = Field(serialization_alias="object") | |
"""The object type, which is always "model".""" | |
owned_by: str | |
"""The organization that owns the model.""" | |
language: list[str] = Field(default_factory=list) | |
"""List of ISO 639-3 supported by the model. It's possible that the list will be empty. This field is not a part of the OpenAI API spec and is added for convenience.""" # noqa: E501 | |
model_config = ConfigDict( | |
populate_by_name=True, | |
json_schema_extra={ | |
"examples": [ | |
{ | |
"id": "Systran/faster-whisper-large-v3", | |
"created": 1700732060, | |
"object": "model", | |
"owned_by": "Systran", | |
}, | |
{ | |
"id": "Systran/faster-distil-whisper-large-v3", | |
"created": 1711378296, | |
"object": "model", | |
"owned_by": "Systran", | |
}, | |
{ | |
"id": "bofenghuang/whisper-large-v2-cv11-french-ct2", | |
"created": 1687968011, | |
"object": "model", | |
"owned_by": "bofenghuang", | |
}, | |
] | |
}, | |
) | |