File size: 1,908 Bytes
624f97e
 
 
 
bf48682
624f97e
bf48682
624f97e
 
 
bf48682
 
 
 
 
 
624f97e
 
 
bf48682
 
624f97e
 
 
 
 
 
bf48682
624f97e
bf48682
 
 
624f97e
 
 
 
 
 
 
bf48682
 
 
 
 
624f97e
 
bf48682
624f97e
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
from __future__ import annotations

from collections import OrderedDict
import gc
import logging
import time
from typing import TYPE_CHECKING

from faster_whisper import WhisperModel

if TYPE_CHECKING:
    from faster_whisper_server.config import (
        Config,
    )

logger = logging.getLogger(__name__)


class ModelManager:
    def __init__(self, config: Config) -> None:
        self.config = config
        self.loaded_models: OrderedDict[str, WhisperModel] = OrderedDict()

    def load_model(self, model_name: str) -> WhisperModel:
        if model_name in self.loaded_models:
            logger.debug(f"{model_name} model already loaded")
            return self.loaded_models[model_name]
        if len(self.loaded_models) >= self.config.max_models:
            oldest_model_name = next(iter(self.loaded_models))
            logger.info(
                f"Max models ({self.config.max_models}) reached. Unloading the oldest model: {oldest_model_name}"
            )
            del self.loaded_models[oldest_model_name]
            gc.collect()
        logger.debug(f"Loading {model_name}...")
        start = time.perf_counter()
        # NOTE: will raise an exception if the model name isn't valid. Should I do an explicit check?
        whisper = WhisperModel(
            model_name,
            device=self.config.whisper.inference_device,
            device_index=self.config.whisper.device_index,
            compute_type=self.config.whisper.compute_type,
            cpu_threads=self.config.whisper.cpu_threads,
            num_workers=self.config.whisper.num_workers,
        )
        logger.info(
            f"Loaded {model_name} loaded in {time.perf_counter() - start:.2f} seconds. {self.config.whisper.inference_device}({self.config.whisper.compute_type}) will be used for inference."  # noqa: E501
        )
        self.loaded_models[model_name] = whisper
        return whisper