Spaces:
Configuration error
Configuration error
Fedir Zadniprovskyi
commited on
Commit
·
d2d8fab
1
Parent(s):
2c38ce0
chore: add `hf_utils` module
Browse files
src/faster_whisper_server/hf_utils.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from collections.abc import Generator
|
2 |
+
from pathlib import Path
|
3 |
+
import typing
|
4 |
+
|
5 |
+
import huggingface_hub
|
6 |
+
|
7 |
+
from faster_whisper_server.logger import logger
|
8 |
+
|
9 |
+
LIBRARY_NAME = "ctranslate2"
|
10 |
+
TASK_NAME = "automatic-speech-recognition"
|
11 |
+
|
12 |
+
|
13 |
+
def does_local_model_exist(model_id: str) -> bool:
|
14 |
+
return any(model_id == model.repo_id for model, _ in list_local_models())
|
15 |
+
|
16 |
+
|
17 |
+
def list_local_models() -> Generator[tuple[huggingface_hub.CachedRepoInfo, huggingface_hub.ModelCardData], None, None]:
|
18 |
+
hf_cache = huggingface_hub.scan_cache_dir()
|
19 |
+
hf_models = [repo for repo in list(hf_cache.repos) if repo.repo_type == "model"]
|
20 |
+
for model in hf_models:
|
21 |
+
revision = next(iter(model.revisions))
|
22 |
+
cached_readme_file = next((f for f in revision.files if f.file_name == "README.md"), None)
|
23 |
+
if cached_readme_file:
|
24 |
+
readme_file_path = Path(cached_readme_file.file_path)
|
25 |
+
else:
|
26 |
+
# NOTE: the README.md doesn't get downloaded when `WhisperModel` is called
|
27 |
+
logger.debug(f"Model {model.repo_id} does not have a README.md file. Downloading it.")
|
28 |
+
readme_file_path = Path(huggingface_hub.hf_hub_download(model.repo_id, "README.md"))
|
29 |
+
|
30 |
+
model_card = huggingface_hub.ModelCard.load(readme_file_path)
|
31 |
+
model_card_data = typing.cast(huggingface_hub.ModelCardData, model_card.data)
|
32 |
+
if (
|
33 |
+
model_card_data.library_name == LIBRARY_NAME
|
34 |
+
and model_card_data.tags is not None
|
35 |
+
and TASK_NAME in model_card_data.tags
|
36 |
+
):
|
37 |
+
yield model, model_card_data
|