Spaces:
Configuration error
Configuration error
File size: 3,741 Bytes
db7bf9a 4bdd7f2 313814b 39ee116 313814b db7bf9a 313814b 4bdd7f2 db7bf9a 313814b 4bdd7f2 db7bf9a 313814b db7bf9a 313814b 4bdd7f2 e01d72d d0feed8 e01d72d 4bdd7f2 db7bf9a 4bdd7f2 db7bf9a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
from __future__ import annotations
from faster_whisper.transcribe import Segment, TranscriptionInfo, Word
from pydantic import BaseModel
from faster_whisper_server import utils
from faster_whisper_server.core import Transcription
# https://platform.openai.com/docs/api-reference/audio/json-object
class TranscriptionJsonResponse(BaseModel):
text: str
@classmethod
def from_segments(cls, segments: list[Segment]) -> TranscriptionJsonResponse:
return cls(text=utils.segments_text(segments))
@classmethod
def from_transcription(
cls, transcription: Transcription
) -> TranscriptionJsonResponse:
return cls(text=transcription.text)
class WordObject(BaseModel):
start: float
end: float
word: str
probability: float
@classmethod
def from_word(cls, word: Word) -> WordObject:
return cls(
start=word.start,
end=word.end,
word=word.word,
probability=word.probability,
)
class SegmentObject(BaseModel):
id: int
seek: int
start: float
end: float
text: str
tokens: list[int]
temperature: float
avg_logprob: float
compression_ratio: float
no_speech_prob: float
@classmethod
def from_segment(cls, segment: Segment) -> SegmentObject:
return cls(
id=segment.id,
seek=segment.seek,
start=segment.start,
end=segment.end,
text=segment.text,
tokens=segment.tokens,
temperature=segment.temperature,
avg_logprob=segment.avg_logprob,
compression_ratio=segment.compression_ratio,
no_speech_prob=segment.no_speech_prob,
)
# https://platform.openai.com/docs/api-reference/audio/verbose-json-object
class TranscriptionVerboseJsonResponse(BaseModel):
task: str = "transcribe"
language: str
duration: float
text: str
words: list[WordObject]
segments: list[SegmentObject]
@classmethod
def from_segment(
cls, segment: Segment, transcription_info: TranscriptionInfo
) -> TranscriptionVerboseJsonResponse:
return cls(
language=transcription_info.language,
duration=segment.end - segment.start,
text=segment.text,
words=(
[WordObject.from_word(word) for word in segment.words]
if isinstance(segment.words, list)
else []
),
segments=[SegmentObject.from_segment(segment)],
)
@classmethod
def from_segments(
cls, segments: list[Segment], transcription_info: TranscriptionInfo
) -> TranscriptionVerboseJsonResponse:
return cls(
language=transcription_info.language,
duration=transcription_info.duration,
text=utils.segments_text(segments),
segments=[SegmentObject.from_segment(segment) for segment in segments],
words=[
WordObject.from_word(word)
for word in utils.words_from_segments(segments)
],
)
@classmethod
def from_transcription(
cls, transcription: Transcription
) -> TranscriptionVerboseJsonResponse:
return cls(
language="english", # FIX: hardcoded
duration=transcription.duration,
text=transcription.text,
words=[
WordObject(
start=word.start,
end=word.end,
word=word.text,
probability=word.probability,
)
for word in transcription.words
],
segments=[], # FIX: hardcoded
)
|