Spaces:

fedirz
/

faster-whisper-server

Configuration error

faster-whisper-server / faster_whisper_server /server_models.py

Fedir Zadniprovskyi

refactor

2a79f48 7 months ago

3.75 kB

	from __future__ import annotations

	from typing import TYPE_CHECKING, Literal

	from pydantic import BaseModel, ConfigDict, Field

	from faster_whisper_server.core import Segment, Transcription, Word, segments_to_text

	if TYPE_CHECKING:
	from faster_whisper.transcribe import TranscriptionInfo


	# https://platform.openai.com/docs/api-reference/audio/json-object
	class TranscriptionJsonResponse(BaseModel):
	text: str

	@classmethod
	def from_segments(cls, segments: list[Segment]) -> TranscriptionJsonResponse:
	return cls(text=segments_to_text(segments))

	@classmethod
	def from_transcription(cls, transcription: Transcription) -> TranscriptionJsonResponse:
	return cls(text=transcription.text)


	# https://platform.openai.com/docs/api-reference/audio/verbose-json-object
	class TranscriptionVerboseJsonResponse(BaseModel):
	task: str = "transcribe"
	language: str
	duration: float
	text: str
	words: list[Word]
	segments: list[Segment]

	@classmethod
	def from_segment(cls, segment: Segment, transcription_info: TranscriptionInfo) -> TranscriptionVerboseJsonResponse:
	return cls(
	language=transcription_info.language,
	duration=segment.end - segment.start,
	text=segment.text,
	words=(segment.words if isinstance(segment.words, list) else []),
	segments=[segment],
	)

	@classmethod
	def from_segments(
	cls, segments: list[Segment], transcription_info: TranscriptionInfo
	) -> TranscriptionVerboseJsonResponse:
	return cls(
	language=transcription_info.language,
	duration=transcription_info.duration,
	text=segments_to_text(segments),
	segments=segments,
	words=Word.from_segments(segments),
	)

	@classmethod
	def from_transcription(cls, transcription: Transcription) -> TranscriptionVerboseJsonResponse:
	return cls(
	language="english", # FIX: hardcoded
	duration=transcription.duration,
	text=transcription.text,
	words=transcription.words,
	segments=[], # FIX: hardcoded
	)


	class ModelListResponse(BaseModel):
	data: list[ModelObject]
	object: Literal["list"] = "list"


	class ModelObject(BaseModel):
	id: str
	"""The model identifier, which can be referenced in the API endpoints."""
	created: int
	"""The Unix timestamp (in seconds) when the model was created."""
	object_: Literal["model"] = Field(serialization_alias="object")
	"""The object type, which is always "model"."""
	owned_by: str
	"""The organization that owns the model."""
	language: list[str] = Field(default_factory=list)
	"""List of ISO 639-3 supported by the model. It's possible that the list will be empty. This field is not a part of the OpenAI API spec and is added for convenience.""" # noqa: E501

	model_config = ConfigDict(
	populate_by_name=True,
	json_schema_extra={
	"examples": [
	{
	"id": "Systran/faster-whisper-large-v3",
	"created": 1700732060,
	"object": "model",
	"owned_by": "Systran",
	},
	{
	"id": "Systran/faster-distil-whisper-large-v3",
	"created": 1711378296,
	"object": "model",
	"owned_by": "Systran",
	},
	{
	"id": "bofenghuang/whisper-large-v2-cv11-french-ct2",
	"created": 1687968011,
	"object": "model",
	"owned_by": "bofenghuang",
	},
	]
	},
	)