File size: 4,878 Bytes
35eafc3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196f56a
35eafc3
 
 
 
 
 
 
 
 
 
 
 
 
 
196f56a
35eafc3
 
 
 
 
 
 
 
 
 
 
 
196f56a
35eafc3
 
 
196f56a
35eafc3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196f56a
35eafc3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import asyncio
import os

import anyio
from httpx import ASGITransport, AsyncClient
import pytest

from faster_whisper_server.main import create_app


@pytest.mark.asyncio
async def test_model_unloaded_after_ttl() -> None:
    ttl = 5
    model = "Systran/faster-whisper-tiny.en"
    os.environ["WHISPER__TTL"] = str(ttl)
    os.environ["ENABLE_UI"] = "false"
    async with AsyncClient(transport=ASGITransport(app=create_app()), base_url="http://test") as aclient:
        res = (await aclient.get("/api/ps")).json()
        assert len(res["models"]) == 0
        await aclient.post(f"/api/ps/{model}")
        res = (await aclient.get("/api/ps")).json()
        assert len(res["models"]) == 1
        await asyncio.sleep(ttl + 1)  # wait for the model to be unloaded
        res = (await aclient.get("/api/ps")).json()
        assert len(res["models"]) == 0


@pytest.mark.asyncio
async def test_ttl_resets_after_usage() -> None:
    ttl = 5
    model = "Systran/faster-whisper-tiny.en"
    os.environ["WHISPER__TTL"] = str(ttl)
    os.environ["ENABLE_UI"] = "false"
    async with AsyncClient(transport=ASGITransport(app=create_app()), base_url="http://test") as aclient:
        await aclient.post(f"/api/ps/{model}")
        res = (await aclient.get("/api/ps")).json()
        assert len(res["models"]) == 1
        await asyncio.sleep(ttl - 2)  # sleep for less than the ttl. The model should not be unloaded
        res = (await aclient.get("/api/ps")).json()
        assert len(res["models"]) == 1

        async with await anyio.open_file("audio.wav", "rb") as f:
            data = await f.read()
        res = (
            await aclient.post(
                "/v1/audio/transcriptions", files={"file": ("audio.wav", data, "audio/wav")}, data={"model": model}
            )
        ).json()
        res = (await aclient.get("/api/ps")).json()
        assert len(res["models"]) == 1
        await asyncio.sleep(ttl - 2)  # sleep for less than the ttl. The model should not be unloaded
        res = (await aclient.get("/api/ps")).json()
        assert len(res["models"]) == 1

        await asyncio.sleep(3)  # sleep for a bit more. The model should be unloaded
        res = (await aclient.get("/api/ps")).json()
        assert len(res["models"]) == 0

        # test the model can be used again after being unloaded
        # this just ensures the model can be loaded again after being unloaded
        res = (
            await aclient.post(
                "/v1/audio/transcriptions", files={"file": ("audio.wav", data, "audio/wav")}, data={"model": model}
            )
        ).json()


@pytest.mark.asyncio
async def test_model_cant_be_unloaded_when_used() -> None:
    ttl = 0
    model = "Systran/faster-whisper-tiny.en"
    os.environ["WHISPER__TTL"] = str(ttl)
    os.environ["ENABLE_UI"] = "false"
    async with AsyncClient(transport=ASGITransport(app=create_app()), base_url="http://test") as aclient:
        async with await anyio.open_file("audio.wav", "rb") as f:
            data = await f.read()

        task = asyncio.create_task(
            aclient.post(
                "/v1/audio/transcriptions", files={"file": ("audio.wav", data, "audio/wav")}, data={"model": model}
            )
        )
        await asyncio.sleep(0.1)  # wait for the server to start processing the request
        res = await aclient.delete(f"/api/ps/{model}")
        assert res.status_code == 409

        await task
        res = (await aclient.get("/api/ps")).json()
        assert len(res["models"]) == 0


@pytest.mark.asyncio
async def test_model_cant_be_loaded_twice() -> None:
    ttl = -1
    model = "Systran/faster-whisper-tiny.en"
    os.environ["ENABLE_UI"] = "false"
    os.environ["WHISPER__TTL"] = str(ttl)
    async with AsyncClient(transport=ASGITransport(app=create_app()), base_url="http://test") as aclient:
        res = await aclient.post(f"/api/ps/{model}")
        assert res.status_code == 201
        res = await aclient.post(f"/api/ps/{model}")
        assert res.status_code == 409
        res = (await aclient.get("/api/ps")).json()
        assert len(res["models"]) == 1


@pytest.mark.asyncio
async def test_model_is_unloaded_after_request_when_ttl_is_zero() -> None:
    ttl = 0
    os.environ["WHISPER__MODEL"] = "Systran/faster-whisper-tiny.en"
    os.environ["WHISPER__TTL"] = str(ttl)
    os.environ["ENABLE_UI"] = "false"
    async with AsyncClient(transport=ASGITransport(app=create_app()), base_url="http://test") as aclient:
        async with await anyio.open_file("audio.wav", "rb") as f:
            data = await f.read()
        res = await aclient.post(
            "/v1/audio/transcriptions",
            files={"file": ("audio.wav", data, "audio/wav")},
            data={"model": "Systran/faster-whisper-tiny.en"},
        )
        res = (await aclient.get("/api/ps")).json()
        assert len(res["models"]) == 0