Spaces:
Configuration error
Configuration error
File size: 2,989 Bytes
4b9d55e a5d79bf 4b9d55e a5d79bf 4b9d55e a5d79bf 4b9d55e a5d79bf 4b9d55e a5d79bf 4b9d55e a5d79bf 4b9d55e a5d79bf 4b9d55e a5d79bf 4b9d55e a5d79bf 4b9d55e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
## Docker Compose (Recommended)
TODO: just reference the existing compose file in the repo
=== "CUDA"
```yaml
# https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html
services:
faster-whisper-server:
image: fedirz/faster-whisper-server:latest-cuda
name: faster-whisper-server
restart: unless-stopped
ports:
- 8000:8000
volumes:
- hf-hub-cache:/home/ubuntu/.cache/huggingface/hub
deploy:
resources:
reservations:
devices:
- capabilities: ["gpu"]
volumes:
hf-hub-cache:
```
=== "CUDA (with CDI feature enabled)"
```yaml
# https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html
services:
faster-whisper-server:
image: fedirz/faster-whisper-server:latest-cuda
name: faster-whisper-server
restart: unless-stopped
ports:
- 8000:8000
volumes:
- hf-hub-cache:/home/ubuntu/.cache/huggingface/hub
deploy:
resources:
reservations:
# https://docs.docker.com/reference/cli/dockerd/#enable-cdi-devices
# https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/cdi-support.html
devices:
- driver: cdi
device_ids:
- nvidia.com/gpu=all
volumes:
hf-hub-cache:
```
=== "CPU"
```yaml
services:
faster-whisper-server:
image: fedirz/faster-whisper-server:latest-cpu
name: faster-whisper-server
restart: unless-stopped
ports:
- 8000:8000
volumes:
- hf-hub-cache:/home/ubuntu/.cache/huggingface/hub
volumes:
hf-hub-cache:
```
## Docker
=== "CUDA"
```bash
docker run --rm --detach --publish 8000:8000 --name faster-whisper-server --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --gpus=all fedirz/faster-whisper-server:latest-cuda
```
=== "CUDA (with CDI feature enabled)"
```bash
docker run --rm --detach --publish 8000:8000 --name faster-whisper-server --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --device=nvidia.com/gpu=all fedirz/faster-whisper-server:latest-cuda
```
=== "CPU"
```bash
docker run --rm --detach --publish 8000:8000 --name faster-whisper-server --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub fedirz/faster-whisper-server:latest-cpu
```
## Kubernetes
WARNING: it was written few months ago and may be outdated.
Please refer to this [blog post](https://substratus.ai/blog/deploying-faster-whisper-on-k8s)
## Python (requires Python 3.12+)
```bash
git clone https://github.com/fedirz/faster-whisper-server.git
cd faster-whisper-server
uv venv
sourve .venv/bin/activate
uv sync --all-extras
uvicorn --factory --host 0.0.0.0 faster_whisper_server.main:create_app
```
|