File size: 3,018 Bytes
8453b3d
 
 
 
 
 
 
 
 
 
 
 
 
84a6b12
 
a2bf507
 
b3ffc71
8453b3d
b3ffc71
a2bf507
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84a6b12
 
 
 
a2bf507
 
 
 
b3ffc71
a2bf507
 
 
 
 
8453b3d
 
 
 
a2bf507
b3ffc71
 
 
 
 
875278a
 
a2bf507
 
 
 
 
8453b3d
 
 
 
 
 
 
 
 
 
 
 
84a6b12
 
a2bf507
b3ffc71
 
a2bf507
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from dataclasses import dataclass, fields
import gradio as gr


@dataclass
class WhisperGradioComponents:
    model_size: gr.Dropdown
    lang: gr.Dropdown
    is_translate: gr.Checkbox
    beam_size: gr.Number
    log_prob_threshold: gr.Number
    no_speech_threshold: gr.Number
    compute_type: gr.Dropdown
    best_of: gr.Number
    patience: gr.Number
    """
    A data class to pass Gradio components to the function before Gradio pre-processing.
    See this documentation for more information about Gradio pre-processing: https://www.gradio.app/docs/components

    Attributes
    ----------
    model_size: gr.Dropdown
        Whisper model size.
    lang: gr.Dropdown
        Source language of the file to transcribe.
    is_translate: gr.Checkbox
        Boolean value that determines whether to translate to English.
        It's Whisper's feature to translate speech from another language directly into English end-to-end.
    beam_size: gr.Number
        Int value that is used for decoding option.
    log_prob_threshold: gr.Number
        If the average log probability over sampled tokens is below this value, treat as failed.
    no_speech_threshold: gr.Number
        If the no_speech probability is higher than this value AND 
        the average log probability over sampled tokens is below `log_prob_threshold`,
        consider the segment as silent.
    compute_type: gr.Dropdown
        compute type for transcription.
        see more info : https://opennmt.net/CTranslate2/quantization.html
    best_of: gr.Number
        Number of candidates when sampling with non-zero temperature.
    patience: gr.Number
        Beam search patience factor.
    """

    def to_list(self) -> list:
        """
        Converts the data class attributes into a list, to pass parameters to a function before Gradio pre-processing.

        Returns
        ----------
        A list of Gradio components
        """
        return [getattr(self, f.name) for f in fields(self)]

    @staticmethod
    def to_values(*params):
        """
        Convert a tuple of parameters into a WhisperValues data class, to use parameters in a function after Gradio pre-processing.

        Parameters
        ----------
        *params: tuple
            This is provided in a tuple because Gradio does not support **kwargs arbitrary.
            Reference : https://discuss.huggingface.co/t/passing-an-additional-argument-to-a-function/25140/2

        Returns
        ----------
        A WhisperValues data class
        """
        return WhisperValues(*params)


@dataclass
class WhisperValues:
    model_size: str
    lang: str
    is_translate: bool
    beam_size: int
    log_prob_threshold: float
    no_speech_threshold: float
    compute_type: str
    best_of: int
    patience: float
    """
    A data class to use Whisper parameters in the function after Gradio pre-processing.
    See this documentation for more information about Gradio pre-processing: : https://www.gradio.app/docs/components
    """