Fedir Zadniprovskyi commited on
Commit
d2d8fab
·
1 Parent(s): 2c38ce0

chore: add `hf_utils` module

Browse files
src/faster_whisper_server/hf_utils.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections.abc import Generator
2
+ from pathlib import Path
3
+ import typing
4
+
5
+ import huggingface_hub
6
+
7
+ from faster_whisper_server.logger import logger
8
+
9
+ LIBRARY_NAME = "ctranslate2"
10
+ TASK_NAME = "automatic-speech-recognition"
11
+
12
+
13
+ def does_local_model_exist(model_id: str) -> bool:
14
+ return any(model_id == model.repo_id for model, _ in list_local_models())
15
+
16
+
17
+ def list_local_models() -> Generator[tuple[huggingface_hub.CachedRepoInfo, huggingface_hub.ModelCardData], None, None]:
18
+ hf_cache = huggingface_hub.scan_cache_dir()
19
+ hf_models = [repo for repo in list(hf_cache.repos) if repo.repo_type == "model"]
20
+ for model in hf_models:
21
+ revision = next(iter(model.revisions))
22
+ cached_readme_file = next((f for f in revision.files if f.file_name == "README.md"), None)
23
+ if cached_readme_file:
24
+ readme_file_path = Path(cached_readme_file.file_path)
25
+ else:
26
+ # NOTE: the README.md doesn't get downloaded when `WhisperModel` is called
27
+ logger.debug(f"Model {model.repo_id} does not have a README.md file. Downloading it.")
28
+ readme_file_path = Path(huggingface_hub.hf_hub_download(model.repo_id, "README.md"))
29
+
30
+ model_card = huggingface_hub.ModelCard.load(readme_file_path)
31
+ model_card_data = typing.cast(huggingface_hub.ModelCardData, model_card.data)
32
+ if (
33
+ model_card_data.library_name == LIBRARY_NAME
34
+ and model_card_data.tags is not None
35
+ and TASK_NAME in model_card_data.tags
36
+ ):
37
+ yield model, model_card_data