Fedir Zadniprovskyi commited on
Commit
d075459
·
1 Parent(s): b995db6

chore: adjust live transcription defaults and add descriptions

Browse files
Files changed (1) hide show
  1. faster_whisper_server/config.py +13 -3
faster_whisper_server/config.py CHANGED
@@ -184,20 +184,30 @@ class Config(BaseSettings):
184
  """
185
 
186
  default_language: Language | None = None
 
 
 
 
187
  default_response_format: ResponseFormat = ResponseFormat.JSON
188
  whisper: WhisperConfig = WhisperConfig()
189
  max_models: int = 1
 
 
 
190
  max_no_data_seconds: float = 1.0
191
  """
192
- Max duration to for the next audio chunk before transcription is finilized and connection is closed.
193
  """
194
  min_duration: float = 1.0
 
 
 
195
  word_timestamp_error_margin: float = 0.2
196
- max_inactivity_seconds: float = 5.0
197
  """
198
  Max allowed audio duration without any speech being detected before transcription is finilized and connection is closed.
199
  """ # noqa: E501
200
- inactivity_window_seconds: float = 10.0
201
  """
202
  Controls how many latest seconds of audio are being passed through VAD.
203
  Should be greater than `max_inactivity_seconds`
 
184
  """
185
 
186
  default_language: Language | None = None
187
+ """
188
+ Default language to use for transcription. If not set, the language will be detected automatically.
189
+ It is recommended to set this as it will improve the performance.
190
+ """
191
  default_response_format: ResponseFormat = ResponseFormat.JSON
192
  whisper: WhisperConfig = WhisperConfig()
193
  max_models: int = 1
194
+ """
195
+ Maximum number of models that can be loaded at a time.
196
+ """
197
  max_no_data_seconds: float = 1.0
198
  """
199
+ Max duration to wait for the next audio chunk before transcription is finilized and connection is closed.
200
  """
201
  min_duration: float = 1.0
202
+ """
203
+ Minimum duration of an audio chunk that will be transcribed.
204
+ """
205
  word_timestamp_error_margin: float = 0.2
206
+ max_inactivity_seconds: float = 2.5
207
  """
208
  Max allowed audio duration without any speech being detected before transcription is finilized and connection is closed.
209
  """ # noqa: E501
210
+ inactivity_window_seconds: float = 5.0
211
  """
212
  Controls how many latest seconds of audio are being passed through VAD.
213
  Should be greater than `max_inactivity_seconds`