Fedir Zadniprovskyi commited on
Commit
2afe55d
·
1 Parent(s): 14812fd

feat: add language field to the model response type

Browse files
faster_whisper_server/main.py CHANGED
@@ -87,18 +87,27 @@ def health() -> Response:
87
 
88
  @app.get("/v1/models")
89
  def get_models() -> ModelListResponse:
90
- models = huggingface_hub.list_models(library="ctranslate2", tags="automatic-speech-recognition")
91
- models = [
92
- ModelObject(
 
 
 
 
 
 
 
 
 
 
93
  id=model.id,
94
  created=int(model.created_at.timestamp()),
95
  object_="model",
96
  owned_by=model.id.split("/")[0],
 
97
  )
98
- for model in models
99
- if model.created_at is not None
100
- ]
101
- return ModelListResponse(data=models)
102
 
103
 
104
  @app.get("/v1/models/{model_name:path}")
@@ -107,7 +116,9 @@ def get_model(
107
  model_name: Annotated[str, Path(example="Systran/faster-distil-whisper-large-v3")],
108
  ) -> ModelObject:
109
  models = list(
110
- huggingface_hub.list_models(model_name=model_name, library="ctranslate2", tags="automatic-speech-recognition")
 
 
111
  )
112
  if len(models) == 0:
113
  raise HTTPException(status_code=404, detail="Model doesn't exists")
@@ -122,11 +133,20 @@ def get_model(
122
  detail=f"Model doesn't exists. Possible matches: {", ".join([model.id for model in models])}",
123
  )
124
  assert exact_match.created_at is not None
 
 
 
 
 
 
 
 
125
  return ModelObject(
126
  id=exact_match.id,
127
  created=int(exact_match.created_at.timestamp()),
128
  object_="model",
129
  owned_by=exact_match.id.split("/")[0],
 
130
  )
131
 
132
 
 
87
 
88
  @app.get("/v1/models")
89
  def get_models() -> ModelListResponse:
90
+ models = huggingface_hub.list_models(library="ctranslate2", tags="automatic-speech-recognition", cardData=True)
91
+ transformed_models: list[ModelObject] = []
92
+ for model in models:
93
+ assert model.created_at is not None
94
+ assert model.card_data is not None
95
+ assert model.card_data.language is None or isinstance(model.card_data.language, str | list)
96
+ if model.card_data.language is None:
97
+ language = []
98
+ elif isinstance(model.card_data.language, str):
99
+ language = [model.card_data.language]
100
+ else:
101
+ language = model.card_data.language
102
+ transformed_model = ModelObject(
103
  id=model.id,
104
  created=int(model.created_at.timestamp()),
105
  object_="model",
106
  owned_by=model.id.split("/")[0],
107
+ language=language,
108
  )
109
+ transformed_models.append(transformed_model)
110
+ return ModelListResponse(data=transformed_models)
 
 
111
 
112
 
113
  @app.get("/v1/models/{model_name:path}")
 
116
  model_name: Annotated[str, Path(example="Systran/faster-distil-whisper-large-v3")],
117
  ) -> ModelObject:
118
  models = list(
119
+ huggingface_hub.list_models(
120
+ model_name=model_name, library="ctranslate2", tags="automatic-speech-recognition", cardData=True
121
+ )
122
  )
123
  if len(models) == 0:
124
  raise HTTPException(status_code=404, detail="Model doesn't exists")
 
133
  detail=f"Model doesn't exists. Possible matches: {", ".join([model.id for model in models])}",
134
  )
135
  assert exact_match.created_at is not None
136
+ assert exact_match.card_data is not None
137
+ assert exact_match.card_data.language is None or isinstance(exact_match.card_data.language, str | list)
138
+ if exact_match.card_data.language is None:
139
+ language = []
140
+ elif isinstance(exact_match.card_data.language, str):
141
+ language = [exact_match.card_data.language]
142
+ else:
143
+ language = exact_match.card_data.language
144
  return ModelObject(
145
  id=exact_match.id,
146
  created=int(exact_match.created_at.timestamp()),
147
  object_="model",
148
  owned_by=exact_match.id.split("/")[0],
149
+ language=language,
150
  )
151
 
152
 
faster_whisper_server/server_models.py CHANGED
@@ -133,6 +133,8 @@ class ModelObject(BaseModel):
133
  """The object type, which is always "model"."""
134
  owned_by: str
135
  """The organization that owns the model."""
 
 
136
 
137
  model_config = ConfigDict(
138
  populate_by_name=True,
 
133
  """The object type, which is always "model"."""
134
  owned_by: str
135
  """The organization that owns the model."""
136
+ language: list[str] = Field(default_factory=list)
137
+ """List of ISO 639-3 supported by the model. It's possible that the list will be empty. This field is not a part of the OpenAI API spec and is added for convenience.""" # noqa: E501
138
 
139
  model_config = ConfigDict(
140
  populate_by_name=True,