Fedir Zadniprovskyi commited on
Commit
47627a9
·
1 Parent(s): af41874

chore: misc

Browse files
.github/workflows/docker-build-and-push.yaml CHANGED
@@ -16,9 +16,9 @@ jobs:
16
  dockerfile: [Dockerfile.cuda, Dockerfile.cpu]
17
  include:
18
  - dockerfile: Dockerfile.cuda
19
- tag-prefix: cuda-
20
  - dockerfile: Dockerfile.cpu
21
- tag-prefix: cpu-
22
  steps:
23
  - uses: actions/checkout@v4
24
  - name: Login to Docker Hub
@@ -33,7 +33,8 @@ jobs:
33
  fedirz/faster-whisper-server
34
  # https://github.com/docker/metadata-action?tab=readme-ov-file#flavor-input
35
  flavor: |
36
- prefix=${{ matrix.tag-prefix }}
 
37
  tags: |
38
  type=semver,pattern={{version}}
39
  type=semver,pattern={{major}}.{{minor}}
 
16
  dockerfile: [Dockerfile.cuda, Dockerfile.cpu]
17
  include:
18
  - dockerfile: Dockerfile.cuda
19
+ tag-suffix: -cuda
20
  - dockerfile: Dockerfile.cpu
21
+ tag-suffix: -cpu
22
  steps:
23
  - uses: actions/checkout@v4
24
  - name: Login to Docker Hub
 
33
  fedirz/faster-whisper-server
34
  # https://github.com/docker/metadata-action?tab=readme-ov-file#flavor-input
35
  flavor: |
36
+ latest=false
37
+ suffix=${{ matrix.tag-suffix}}
38
  tags: |
39
  type=semver,pattern={{version}}
40
  type=semver,pattern={{major}}.{{minor}}
Dockerfile.cpu CHANGED
@@ -15,7 +15,7 @@ RUN poetry install --only main
15
  COPY ./faster_whisper_server ./faster_whisper_server
16
  ENTRYPOINT ["poetry", "run"]
17
  CMD ["uvicorn", "faster_whisper_server.main:app"]
18
- ENV WHISPER_MODEL=distil-medium.en
19
  ENV WHISPER_INFERENCE_DEVICE=cpu
20
  ENV WHISPER_COMPUTE_TYPE=int8
21
  ENV UVICORN_HOST=0.0.0.0
 
15
  COPY ./faster_whisper_server ./faster_whisper_server
16
  ENTRYPOINT ["poetry", "run"]
17
  CMD ["uvicorn", "faster_whisper_server.main:app"]
18
+ ENV WHISPER_MODEL=medium.en
19
  ENV WHISPER_INFERENCE_DEVICE=cpu
20
  ENV WHISPER_COMPUTE_TYPE=int8
21
  ENV UVICORN_HOST=0.0.0.0
README.md CHANGED
@@ -60,10 +60,10 @@ print(transcript.text)
60
  # If `model` isn't specified, the default model is used
61
  curl http://localhost:8000/v1/audio/transcriptions -F "[email protected]"
62
  curl http://localhost:8000/v1/audio/transcriptions -F "[email protected]"
63
- curl http://localhost:8000/v1/audio/transcriptions -F "[email protected]" -F "streaming=true"
64
- curl http://localhost:8000/v1/audio/transcriptions -F "[email protected]" -F "streaming=true" -F "model=distil-large-v3"
65
  # It's recommended that you always specify the language as that will reduce the transcription time
66
- curl http://localhost:8000/v1/audio/transcriptions -F "[email protected]" -F "streaming=true" -F "model=distil-large-v3" -F "language=en"
67
 
68
  curl http://localhost:8000/v1/audio/translations -F "[email protected]"
69
  ```
 
60
  # If `model` isn't specified, the default model is used
61
  curl http://localhost:8000/v1/audio/transcriptions -F "[email protected]"
62
  curl http://localhost:8000/v1/audio/transcriptions -F "[email protected]"
63
+ curl http://localhost:8000/v1/audio/transcriptions -F "[email protected]" -F "stream=true"
64
+ curl http://localhost:8000/v1/audio/transcriptions -F "[email protected]" -F "stream=true" -F "model=distil-large-v3"
65
  # It's recommended that you always specify the language as that will reduce the transcription time
66
+ curl http://localhost:8000/v1/audio/transcriptions -F "[email protected]" -F "stream=true" -F "model=distil-large-v3" -F "language=en"
67
 
68
  curl http://localhost:8000/v1/audio/translations -F "[email protected]"
69
  ```
faster_whisper_server/config.py CHANGED
@@ -163,7 +163,7 @@ class Language(enum.StrEnum):
163
 
164
 
165
  class WhisperConfig(BaseModel):
166
- model: Model = Field(default=Model.DISTIL_MEDIUM_EN)
167
  inference_device: Device = Field(default=Device.AUTO)
168
  compute_type: Quantization = Field(default=Quantization.DEFAULT)
169
 
 
163
 
164
 
165
  class WhisperConfig(BaseModel):
166
+ model: Model = Field(default=Model.MEDIUM_EN)
167
  inference_device: Device = Field(default=Device.AUTO)
168
  compute_type: Quantization = Field(default=Quantization.DEFAULT)
169
 
faster_whisper_server/main.py CHANGED
@@ -58,7 +58,7 @@ def load_model(model_name: Model) -> WhisperModel:
58
  compute_type=config.whisper.compute_type,
59
  )
60
  logger.info(
61
- f"Loaded {model_name} loaded in {time.perf_counter() - start:.2f} seconds"
62
  )
63
  models[model_name] = whisper
64
  return whisper
 
58
  compute_type=config.whisper.compute_type,
59
  )
60
  logger.info(
61
+ f"Loaded {model_name} loaded in {time.perf_counter() - start:.2f} seconds. {config.whisper.inference_device}({config.whisper.compute_type}) will be used for inference."
62
  )
63
  models[model_name] = whisper
64
  return whisper