Fedir Zadniprovskyi commited on
Commit
2a79f48
·
1 Parent(s): 125092f
faster_whisper_server/asr.py CHANGED
@@ -1,11 +1,10 @@
1
  import asyncio
2
- from collections.abc import Iterable
3
  import time
4
 
5
  from faster_whisper import transcribe
6
 
7
  from faster_whisper_server.audio import Audio
8
- from faster_whisper_server.core import Transcription, Word
9
  from faster_whisper_server.logger import logger
10
 
11
 
@@ -30,7 +29,8 @@ class FasterWhisperASR:
30
  word_timestamps=True,
31
  **self.transcribe_opts,
32
  )
33
- words = words_from_whisper_segments(segments)
 
34
  for word in words:
35
  word.offset(audio.start)
36
  transcription = Transcription(words)
@@ -54,19 +54,3 @@ class FasterWhisperASR:
54
  audio,
55
  prompt,
56
  )
57
-
58
-
59
- def words_from_whisper_segments(segments: Iterable[transcribe.Segment]) -> list[Word]:
60
- words: list[Word] = []
61
- for segment in segments:
62
- assert segment.words is not None
63
- words.extend(
64
- Word(
65
- start=word.start,
66
- end=word.end,
67
- text=word.word,
68
- probability=word.probability,
69
- )
70
- for word in segment.words
71
- )
72
- return words
 
1
  import asyncio
 
2
  import time
3
 
4
  from faster_whisper import transcribe
5
 
6
  from faster_whisper_server.audio import Audio
7
+ from faster_whisper_server.core import Segment, Transcription, Word
8
  from faster_whisper_server.logger import logger
9
 
10
 
 
29
  word_timestamps=True,
30
  **self.transcribe_opts,
31
  )
32
+ segments = Segment.from_faster_whisper_segments(segments)
33
+ words = Word.from_segments(segments)
34
  for word in words:
35
  word.offset(audio.start)
36
  transcription = Transcription(words)
 
54
  audio,
55
  prompt,
56
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
faster_whisper_server/core.py CHANGED
@@ -1,43 +1,85 @@
1
- # TODO: rename module
2
  from __future__ import annotations
3
 
4
- from dataclasses import dataclass
5
  import re
 
 
 
6
 
7
  from faster_whisper_server.config import config
8
 
 
 
9
 
10
- # TODO: use the `Segment` from `faster-whisper.transcribe` instead
11
- @dataclass
12
- class Segment:
13
- text: str
14
- start: float = 0.0
15
- end: float = 0.0
16
 
17
- @property
18
- def is_eos(self) -> bool:
19
- if self.text.endswith("..."):
20
- return False
21
- return any(self.text.endswith(punctuation_symbol) for punctuation_symbol in ".?!")
 
 
 
 
 
 
 
 
 
22
 
23
  def offset(self, seconds: float) -> None:
24
  self.start += seconds
25
  self.end += seconds
26
 
27
-
28
- # TODO: use the `Word` from `faster-whisper.transcribe` instead
29
- @dataclass
30
- class Word(Segment):
31
- probability: float = 0.0
32
-
33
  @classmethod
34
  def common_prefix(cls, a: list[Word], b: list[Word]) -> list[Word]:
35
  i = 0
36
- while i < len(a) and i < len(b) and canonicalize_word(a[i].text) == canonicalize_word(b[i].text):
37
  i += 1
38
  return a[:i]
39
 
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  class Transcription:
42
  def __init__(self, words: list[Word] = []) -> None:
43
  self.words: list[Word] = []
@@ -45,7 +87,7 @@ class Transcription:
45
 
46
  @property
47
  def text(self) -> str:
48
- return " ".join(word.text for word in self.words).strip()
49
 
50
  @property
51
  def start(self) -> float:
@@ -77,48 +119,57 @@ class Transcription:
77
  raise ValueError(f"Words overlap: {words[i - 1]} and {words[i]}. All words: {words}")
78
 
79
 
80
- def test_segment_is_eos() -> None:
81
- assert not Segment("Hello").is_eos
82
- assert not Segment("Hello...").is_eos
83
- assert Segment("Hello.").is_eos
84
- assert Segment("Hello!").is_eos
85
- assert Segment("Hello?").is_eos
86
- assert not Segment("Hello. Yo").is_eos
87
- assert not Segment("Hello. Yo...").is_eos
88
- assert Segment("Hello. Yo.").is_eos
 
 
 
 
 
 
89
 
90
 
91
- def to_full_sentences(words: list[Word]) -> list[Segment]:
92
- sentences: list[Segment] = [Segment("")]
93
  for word in words:
94
- sentences[-1] = Segment(
95
- start=sentences[-1].start,
96
- end=word.end,
97
- text=sentences[-1].text + word.text,
98
- )
99
- if word.is_eos:
100
- sentences.append(Segment(""))
101
- if len(sentences) > 0 and not sentences[-1].is_eos:
102
  sentences.pop()
103
  return sentences
104
 
105
 
106
  def tests_to_full_sentences() -> None:
 
 
 
107
  assert to_full_sentences([]) == []
108
- assert to_full_sentences([Word(text="Hello")]) == []
109
- assert to_full_sentences([Word(text="Hello..."), Word(" world")]) == []
110
- assert to_full_sentences([Word(text="Hello..."), Word(" world.")]) == [Segment(text="Hello... world.")]
111
- assert to_full_sentences([Word(text="Hello..."), Word(" world."), Word(" How")]) == [
112
- Segment(text="Hello... world.")
113
  ]
114
 
115
 
116
- def to_text(words: list[Word]) -> str:
117
- return "".join(word.text for word in words)
 
 
 
 
118
 
119
 
120
- def to_text_w_ts(words: list[Word]) -> str:
121
- return "".join(f"{word.text}({word.start:.2f}-{word.end:.2f})" for word in words)
122
 
123
 
124
  def canonicalize_word(text: str) -> str:
@@ -136,14 +187,14 @@ def test_canonicalize_word() -> None:
136
 
137
  def common_prefix(a: list[Word], b: list[Word]) -> list[Word]:
138
  i = 0
139
- while i < len(a) and i < len(b) and canonicalize_word(a[i].text) == canonicalize_word(b[i].text):
140
  i += 1
141
  return a[:i]
142
 
143
 
144
  def test_common_prefix() -> None:
145
  def word(text: str) -> Word:
146
- return Word(text=text, start=0.0, end=0.0, probability=0.0)
147
 
148
  a = [word("a"), word("b"), word("c")]
149
  b = [word("a"), word("b"), word("c")]
@@ -176,7 +227,7 @@ def test_common_prefix() -> None:
176
 
177
  def test_common_prefix_and_canonicalization() -> None:
178
  def word(text: str) -> Word:
179
- return Word(text=text, start=0.0, end=0.0, probability=0.0)
180
 
181
  a = [word("A...")]
182
  b = [word("a?"), word("b"), word("c")]
 
 
1
  from __future__ import annotations
2
 
 
3
  import re
4
+ from typing import TYPE_CHECKING
5
+
6
+ from pydantic import BaseModel
7
 
8
  from faster_whisper_server.config import config
9
 
10
+ if TYPE_CHECKING:
11
+ from collections.abc import Iterable
12
 
13
+ import faster_whisper.transcribe
 
 
 
 
 
14
 
15
+
16
+ class Word(BaseModel):
17
+ start: float
18
+ end: float
19
+ word: str
20
+ probability: float
21
+
22
+ @classmethod
23
+ def from_segments(cls, segments: Iterable[Segment]) -> list[Word]:
24
+ words: list[Word] = []
25
+ for segment in segments:
26
+ assert segment.words is not None
27
+ words.extend(segment.words)
28
+ return words
29
 
30
  def offset(self, seconds: float) -> None:
31
  self.start += seconds
32
  self.end += seconds
33
 
 
 
 
 
 
 
34
  @classmethod
35
  def common_prefix(cls, a: list[Word], b: list[Word]) -> list[Word]:
36
  i = 0
37
+ while i < len(a) and i < len(b) and canonicalize_word(a[i].word) == canonicalize_word(b[i].word):
38
  i += 1
39
  return a[:i]
40
 
41
 
42
+ class Segment(BaseModel):
43
+ id: int
44
+ seek: int
45
+ start: float
46
+ end: float
47
+ text: str
48
+ tokens: list[int]
49
+ temperature: float
50
+ avg_logprob: float
51
+ compression_ratio: float
52
+ no_speech_prob: float
53
+ words: list[Word] | None
54
+
55
+ @classmethod
56
+ def from_faster_whisper_segments(cls, segments: Iterable[faster_whisper.transcribe.Segment]) -> Iterable[Segment]:
57
+ for segment in segments:
58
+ yield cls(
59
+ id=segment.id,
60
+ seek=segment.seek,
61
+ start=segment.start,
62
+ end=segment.end,
63
+ text=segment.text,
64
+ tokens=segment.tokens,
65
+ temperature=segment.temperature,
66
+ avg_logprob=segment.avg_logprob,
67
+ compression_ratio=segment.compression_ratio,
68
+ no_speech_prob=segment.no_speech_prob,
69
+ words=[
70
+ Word(
71
+ start=word.start,
72
+ end=word.end,
73
+ word=word.word,
74
+ probability=word.probability,
75
+ )
76
+ for word in segment.words
77
+ ]
78
+ if segment.words is not None
79
+ else None,
80
+ )
81
+
82
+
83
  class Transcription:
84
  def __init__(self, words: list[Word] = []) -> None:
85
  self.words: list[Word] = []
 
87
 
88
  @property
89
  def text(self) -> str:
90
+ return " ".join(word.word for word in self.words).strip()
91
 
92
  @property
93
  def start(self) -> float:
 
119
  raise ValueError(f"Words overlap: {words[i - 1]} and {words[i]}. All words: {words}")
120
 
121
 
122
+ def is_eos(text: str) -> bool:
123
+ if text.endswith("..."):
124
+ return False
125
+ return any(text.endswith(punctuation_symbol) for punctuation_symbol in ".?!")
126
+
127
+
128
+ def test_is_eos() -> None:
129
+ assert not is_eos("Hello")
130
+ assert not is_eos("Hello...")
131
+ assert is_eos("Hello.")
132
+ assert is_eos("Hello!")
133
+ assert is_eos("Hello?")
134
+ assert not is_eos("Hello. Yo")
135
+ assert not is_eos("Hello. Yo...")
136
+ assert is_eos("Hello. Yo.")
137
 
138
 
139
+ def to_full_sentences(words: list[Word]) -> list[list[Word]]:
140
+ sentences: list[list[Word]] = [[]]
141
  for word in words:
142
+ sentences[-1].append(word)
143
+ if is_eos(word.word):
144
+ sentences.append([])
145
+ if len(sentences[-1]) == 0 or not is_eos(sentences[-1][-1].word):
 
 
 
 
146
  sentences.pop()
147
  return sentences
148
 
149
 
150
  def tests_to_full_sentences() -> None:
151
+ def word(text: str) -> Word:
152
+ return Word(word=text, start=0.0, end=0.0, probability=0.0)
153
+
154
  assert to_full_sentences([]) == []
155
+ assert to_full_sentences([word(text="Hello")]) == []
156
+ assert to_full_sentences([word(text="Hello..."), word(" world")]) == []
157
+ assert to_full_sentences([word(text="Hello..."), word(" world.")]) == [[word("Hello..."), word(" world.")]]
158
+ assert to_full_sentences([word(text="Hello..."), word(" world."), word(" How")]) == [
159
+ [word("Hello..."), word(" world.")],
160
  ]
161
 
162
 
163
+ def word_to_text(words: list[Word]) -> str:
164
+ return "".join(word.word for word in words)
165
+
166
+
167
+ def words_to_text_w_ts(words: list[Word]) -> str:
168
+ return "".join(f"{word.word}({word.start:.2f}-{word.end:.2f})" for word in words)
169
 
170
 
171
+ def segments_to_text(segments: Iterable[Segment]) -> str:
172
+ return "".join(segment.text for segment in segments).strip()
173
 
174
 
175
  def canonicalize_word(text: str) -> str:
 
187
 
188
  def common_prefix(a: list[Word], b: list[Word]) -> list[Word]:
189
  i = 0
190
+ while i < len(a) and i < len(b) and canonicalize_word(a[i].word) == canonicalize_word(b[i].word):
191
  i += 1
192
  return a[:i]
193
 
194
 
195
  def test_common_prefix() -> None:
196
  def word(text: str) -> Word:
197
+ return Word(word=text, start=0.0, end=0.0, probability=0.0)
198
 
199
  a = [word("a"), word("b"), word("c")]
200
  b = [word("a"), word("b"), word("c")]
 
227
 
228
  def test_common_prefix_and_canonicalization() -> None:
229
  def word(text: str) -> Word:
230
+ return Word(word=text, start=0.0, end=0.0, probability=0.0)
231
 
232
  a = [word("A...")]
233
  b = [word("a?"), word("b"), word("c")]
faster_whisper_server/main.py CHANGED
@@ -24,7 +24,6 @@ from faster_whisper.vad import VadOptions, get_speech_timestamps
24
  import huggingface_hub
25
  from pydantic import AfterValidator
26
 
27
- from faster_whisper_server import utils
28
  from faster_whisper_server.asr import FasterWhisperASR
29
  from faster_whisper_server.audio import AudioStream, audio_samples_from_file
30
  from faster_whisper_server.config import (
@@ -34,6 +33,7 @@ from faster_whisper_server.config import (
34
  Task,
35
  config,
36
  )
 
37
  from faster_whisper_server.logger import logger
38
  from faster_whisper_server.server_models import (
39
  ModelListResponse,
@@ -46,7 +46,7 @@ from faster_whisper_server.transcriber import audio_transcriber
46
  if TYPE_CHECKING:
47
  from collections.abc import Generator, Iterable
48
 
49
- from faster_whisper.transcribe import Segment, TranscriptionInfo
50
  from huggingface_hub.hf_api import ModelInfo
51
 
52
  loaded_models: OrderedDict[str, WhisperModel] = OrderedDict()
@@ -157,7 +157,7 @@ def segments_to_response(
157
  ) -> str | TranscriptionJsonResponse | TranscriptionVerboseJsonResponse:
158
  segments = list(segments)
159
  if response_format == ResponseFormat.TEXT: # noqa: RET503
160
- return utils.segments_text(segments)
161
  elif response_format == ResponseFormat.JSON:
162
  return TranscriptionJsonResponse.from_segments(segments)
163
  elif response_format == ResponseFormat.VERBOSE_JSON:
@@ -220,6 +220,7 @@ def translate_file(
220
  temperature=temperature,
221
  vad_filter=True,
222
  )
 
223
 
224
  if stream:
225
  return segments_to_streaming_response(segments, transcription_info, response_format)
@@ -258,6 +259,7 @@ def transcribe_file(
258
  vad_filter=True,
259
  hotwords=hotwords,
260
  )
 
261
 
262
  if stream:
263
  return segments_to_streaming_response(segments, transcription_info, response_format)
 
24
  import huggingface_hub
25
  from pydantic import AfterValidator
26
 
 
27
  from faster_whisper_server.asr import FasterWhisperASR
28
  from faster_whisper_server.audio import AudioStream, audio_samples_from_file
29
  from faster_whisper_server.config import (
 
33
  Task,
34
  config,
35
  )
36
+ from faster_whisper_server.core import Segment, segments_to_text
37
  from faster_whisper_server.logger import logger
38
  from faster_whisper_server.server_models import (
39
  ModelListResponse,
 
46
  if TYPE_CHECKING:
47
  from collections.abc import Generator, Iterable
48
 
49
+ from faster_whisper.transcribe import TranscriptionInfo
50
  from huggingface_hub.hf_api import ModelInfo
51
 
52
  loaded_models: OrderedDict[str, WhisperModel] = OrderedDict()
 
157
  ) -> str | TranscriptionJsonResponse | TranscriptionVerboseJsonResponse:
158
  segments = list(segments)
159
  if response_format == ResponseFormat.TEXT: # noqa: RET503
160
+ return segments_to_text(segments)
161
  elif response_format == ResponseFormat.JSON:
162
  return TranscriptionJsonResponse.from_segments(segments)
163
  elif response_format == ResponseFormat.VERBOSE_JSON:
 
220
  temperature=temperature,
221
  vad_filter=True,
222
  )
223
+ segments = Segment.from_faster_whisper_segments(segments)
224
 
225
  if stream:
226
  return segments_to_streaming_response(segments, transcription_info, response_format)
 
259
  vad_filter=True,
260
  hotwords=hotwords,
261
  )
262
+ segments = Segment.from_faster_whisper_segments(segments)
263
 
264
  if stream:
265
  return segments_to_streaming_response(segments, transcription_info, response_format)
faster_whisper_server/server_models.py CHANGED
@@ -4,12 +4,10 @@ from typing import TYPE_CHECKING, Literal
4
 
5
  from pydantic import BaseModel, ConfigDict, Field
6
 
7
- from faster_whisper_server import utils
8
 
9
  if TYPE_CHECKING:
10
- from faster_whisper.transcribe import Segment, TranscriptionInfo, Word
11
-
12
- from faster_whisper_server.core import Transcription
13
 
14
 
15
  # https://platform.openai.com/docs/api-reference/audio/json-object
@@ -18,65 +16,21 @@ class TranscriptionJsonResponse(BaseModel):
18
 
19
  @classmethod
20
  def from_segments(cls, segments: list[Segment]) -> TranscriptionJsonResponse:
21
- return cls(text=utils.segments_text(segments))
22
 
23
  @classmethod
24
  def from_transcription(cls, transcription: Transcription) -> TranscriptionJsonResponse:
25
  return cls(text=transcription.text)
26
 
27
 
28
- class WordObject(BaseModel):
29
- start: float
30
- end: float
31
- word: str
32
- probability: float
33
-
34
- @classmethod
35
- def from_word(cls, word: Word) -> WordObject:
36
- return cls(
37
- start=word.start,
38
- end=word.end,
39
- word=word.word,
40
- probability=word.probability,
41
- )
42
-
43
-
44
- class SegmentObject(BaseModel):
45
- id: int
46
- seek: int
47
- start: float
48
- end: float
49
- text: str
50
- tokens: list[int]
51
- temperature: float
52
- avg_logprob: float
53
- compression_ratio: float
54
- no_speech_prob: float
55
-
56
- @classmethod
57
- def from_segment(cls, segment: Segment) -> SegmentObject:
58
- return cls(
59
- id=segment.id,
60
- seek=segment.seek,
61
- start=segment.start,
62
- end=segment.end,
63
- text=segment.text,
64
- tokens=segment.tokens,
65
- temperature=segment.temperature,
66
- avg_logprob=segment.avg_logprob,
67
- compression_ratio=segment.compression_ratio,
68
- no_speech_prob=segment.no_speech_prob,
69
- )
70
-
71
-
72
  # https://platform.openai.com/docs/api-reference/audio/verbose-json-object
73
  class TranscriptionVerboseJsonResponse(BaseModel):
74
  task: str = "transcribe"
75
  language: str
76
  duration: float
77
  text: str
78
- words: list[WordObject]
79
- segments: list[SegmentObject]
80
 
81
  @classmethod
82
  def from_segment(cls, segment: Segment, transcription_info: TranscriptionInfo) -> TranscriptionVerboseJsonResponse:
@@ -84,8 +38,8 @@ class TranscriptionVerboseJsonResponse(BaseModel):
84
  language=transcription_info.language,
85
  duration=segment.end - segment.start,
86
  text=segment.text,
87
- words=([WordObject.from_word(word) for word in segment.words] if isinstance(segment.words, list) else []),
88
- segments=[SegmentObject.from_segment(segment)],
89
  )
90
 
91
  @classmethod
@@ -95,9 +49,9 @@ class TranscriptionVerboseJsonResponse(BaseModel):
95
  return cls(
96
  language=transcription_info.language,
97
  duration=transcription_info.duration,
98
- text=utils.segments_text(segments),
99
- segments=[SegmentObject.from_segment(segment) for segment in segments],
100
- words=[WordObject.from_word(word) for word in utils.words_from_segments(segments)],
101
  )
102
 
103
  @classmethod
@@ -106,15 +60,7 @@ class TranscriptionVerboseJsonResponse(BaseModel):
106
  language="english", # FIX: hardcoded
107
  duration=transcription.duration,
108
  text=transcription.text,
109
- words=[
110
- WordObject(
111
- start=word.start,
112
- end=word.end,
113
- word=word.text,
114
- probability=word.probability,
115
- )
116
- for word in transcription.words
117
- ],
118
  segments=[], # FIX: hardcoded
119
  )
120
 
 
4
 
5
  from pydantic import BaseModel, ConfigDict, Field
6
 
7
+ from faster_whisper_server.core import Segment, Transcription, Word, segments_to_text
8
 
9
  if TYPE_CHECKING:
10
+ from faster_whisper.transcribe import TranscriptionInfo
 
 
11
 
12
 
13
  # https://platform.openai.com/docs/api-reference/audio/json-object
 
16
 
17
  @classmethod
18
  def from_segments(cls, segments: list[Segment]) -> TranscriptionJsonResponse:
19
+ return cls(text=segments_to_text(segments))
20
 
21
  @classmethod
22
  def from_transcription(cls, transcription: Transcription) -> TranscriptionJsonResponse:
23
  return cls(text=transcription.text)
24
 
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  # https://platform.openai.com/docs/api-reference/audio/verbose-json-object
27
  class TranscriptionVerboseJsonResponse(BaseModel):
28
  task: str = "transcribe"
29
  language: str
30
  duration: float
31
  text: str
32
+ words: list[Word]
33
+ segments: list[Segment]
34
 
35
  @classmethod
36
  def from_segment(cls, segment: Segment, transcription_info: TranscriptionInfo) -> TranscriptionVerboseJsonResponse:
 
38
  language=transcription_info.language,
39
  duration=segment.end - segment.start,
40
  text=segment.text,
41
+ words=(segment.words if isinstance(segment.words, list) else []),
42
+ segments=[segment],
43
  )
44
 
45
  @classmethod
 
49
  return cls(
50
  language=transcription_info.language,
51
  duration=transcription_info.duration,
52
+ text=segments_to_text(segments),
53
+ segments=segments,
54
+ words=Word.from_segments(segments),
55
  )
56
 
57
  @classmethod
 
60
  language="english", # FIX: hardcoded
61
  duration=transcription.duration,
62
  text=transcription.text,
63
+ words=transcription.words,
 
 
 
 
 
 
 
 
64
  segments=[], # FIX: hardcoded
65
  )
66
 
faster_whisper_server/transcriber.py CHANGED
@@ -4,12 +4,7 @@ from typing import TYPE_CHECKING
4
 
5
  from faster_whisper_server.audio import Audio, AudioStream
6
  from faster_whisper_server.config import config
7
- from faster_whisper_server.core import (
8
- Transcription,
9
- Word,
10
- common_prefix,
11
- to_full_sentences,
12
- )
13
  from faster_whisper_server.logger import logger
14
 
15
  if TYPE_CHECKING:
@@ -37,30 +32,16 @@ class LocalAgreement:
37
 
38
  return prefix
39
 
40
- @classmethod
41
- def prompt(cls, confirmed: Transcription) -> str | None:
42
- sentences = to_full_sentences(confirmed.words)
43
- if len(sentences) == 0:
44
- return None
45
- return sentences[-1].text
46
-
47
- # TODO: better name
48
- @classmethod
49
- def needs_audio_after(cls, confirmed: Transcription) -> float:
50
- full_sentences = to_full_sentences(confirmed.words)
51
- return full_sentences[-1].end if len(full_sentences) > 0 else 0.0
52
-
53
 
 
54
  def needs_audio_after(confirmed: Transcription) -> float:
55
  full_sentences = to_full_sentences(confirmed.words)
56
- return full_sentences[-1].end if len(full_sentences) > 0 else 0.0
57
 
58
 
59
  def prompt(confirmed: Transcription) -> str | None:
60
  sentences = to_full_sentences(confirmed.words)
61
- if len(sentences) == 0:
62
- return None
63
- return sentences[-1].text
64
 
65
 
66
  async def audio_transcriber(
 
4
 
5
  from faster_whisper_server.audio import Audio, AudioStream
6
  from faster_whisper_server.config import config
7
+ from faster_whisper_server.core import Transcription, Word, common_prefix, to_full_sentences, word_to_text
 
 
 
 
 
8
  from faster_whisper_server.logger import logger
9
 
10
  if TYPE_CHECKING:
 
32
 
33
  return prefix
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
+ # TODO: needs a better name
37
  def needs_audio_after(confirmed: Transcription) -> float:
38
  full_sentences = to_full_sentences(confirmed.words)
39
+ return full_sentences[-1][-1].end if len(full_sentences) > 0 else 0.0
40
 
41
 
42
  def prompt(confirmed: Transcription) -> str | None:
43
  sentences = to_full_sentences(confirmed.words)
44
+ return word_to_text(sentences[-1]) if len(sentences) > 0 else None
 
 
45
 
46
 
47
  async def audio_transcriber(
faster_whisper_server/utils.py DELETED
@@ -1,14 +0,0 @@
1
- from faster_whisper.transcribe import Segment, Word
2
-
3
-
4
- def segments_text(segments: list[Segment]) -> str:
5
- return "".join(segment.text for segment in segments).strip()
6
-
7
-
8
- def words_from_segments(segments: list[Segment]) -> list[Word]:
9
- words = []
10
- for segment in segments:
11
- if segment.words is None:
12
- continue
13
- words.extend(segment.words)
14
- return words