jhj0517 commited on
Commit
f5d473e
·
2 Parent(s): 545761a 0c8c544

Merge branch 'master' into feature/add-bgm-tab

Browse files
README.md CHANGED
@@ -25,6 +25,7 @@ If you wish to try this on Colab, you can do it in [here](https://colab.research
25
  - Translate subtitle files using Facebook NLLB models
26
  - Translate subtitle files using DeepL API
27
  - Pre-processing audio input with [Silero VAD](https://github.com/snakers4/silero-vad).
 
28
  - Post-processing with speaker diarization using the [pyannote](https://huggingface.co/pyannote/speaker-diarization-3.1) model.
29
  - To download the pyannote model, you need to have a Huggingface token and manually accept their terms in the pages below.
30
  1. https://huggingface.co/pyannote/speaker-diarization-3.1
@@ -109,8 +110,6 @@ This is Whisper's original VRAM usage table for models.
109
  - [x] Integrate with faster-whisper
110
  - [x] Integrate with insanely-fast-whisper
111
  - [x] Integrate with whisperX ( Only speaker diarization part )
112
- - [ ] Add background music separation pre-processing with [MVSEP-MDX23](https://github.com/ZFTurbo/MVSEP-MDX23-music-separation-model)
113
  - [ ] Add fast api script
114
  - [ ] Support real-time transcription for microphone
115
-
116
-
 
25
  - Translate subtitle files using Facebook NLLB models
26
  - Translate subtitle files using DeepL API
27
  - Pre-processing audio input with [Silero VAD](https://github.com/snakers4/silero-vad).
28
+ - Pre-processing audio input to separate BGM with [UVR](https://github.com/Anjok07/ultimatevocalremovergui), [UVR-api](https://github.com/NextAudioGen/ultimatevocalremover_api).
29
  - Post-processing with speaker diarization using the [pyannote](https://huggingface.co/pyannote/speaker-diarization-3.1) model.
30
  - To download the pyannote model, you need to have a Huggingface token and manually accept their terms in the pages below.
31
  1. https://huggingface.co/pyannote/speaker-diarization-3.1
 
110
  - [x] Integrate with faster-whisper
111
  - [x] Integrate with insanely-fast-whisper
112
  - [x] Integrate with whisperX ( Only speaker diarization part )
113
+ - [x] Add background music separation pre-processing with [UVR](https://github.com/Anjok07/ultimatevocalremovergui)
114
  - [ ] Add fast api script
115
  - [ ] Support real-time transcription for microphone
 
 
app.py CHANGED
@@ -26,6 +26,7 @@ class App:
26
  whisper_model_dir=self.args.whisper_model_dir,
27
  faster_whisper_model_dir=self.args.faster_whisper_model_dir,
28
  insanely_fast_whisper_model_dir=self.args.insanely_fast_whisper_model_dir,
 
29
  output_dir=self.args.output_dir,
30
  )
31
  self.nllb_inf = NLLBInference(
@@ -269,7 +270,7 @@ class App:
269
  files_subtitles = gr.Files(label="Downloadable output file", scale=3)
270
  btn_openfolder = gr.Button('📂', scale=1)
271
 
272
- params = [mic_input, dd_file_format]
273
 
274
  btn_run.click(fn=self.whisper_inf.transcribe_mic,
275
  inputs=params + whisper_params.as_list(),
 
26
  whisper_model_dir=self.args.whisper_model_dir,
27
  faster_whisper_model_dir=self.args.faster_whisper_model_dir,
28
  insanely_fast_whisper_model_dir=self.args.insanely_fast_whisper_model_dir,
29
+ uvr_model_dir=self.args.uvr_model_dir,
30
  output_dir=self.args.output_dir,
31
  )
32
  self.nllb_inf = NLLBInference(
 
270
  files_subtitles = gr.Files(label="Downloadable output file", scale=3)
271
  btn_openfolder = gr.Button('📂', scale=1)
272
 
273
+ params = [mic_input, dd_file_format, cb_timestamp]
274
 
275
  btn_run.click(fn=self.whisper_inf.transcribe_mic,
276
  inputs=params + whisper_params.as_list(),
modules/ui/htmls.py CHANGED
@@ -38,7 +38,7 @@ CSS = """
38
  """
39
 
40
  MARKDOWN = """
41
- ### [Whisper Web-UI](https://github.com/jhj0517/Whsiper-WebUI)
42
  """
43
 
44
 
 
38
  """
39
 
40
  MARKDOWN = """
41
+ ### [Whisper-WebUI](https://github.com/jhj0517/Whsiper-WebUI)
42
  """
43
 
44
 
modules/uvr/music_separator.py CHANGED
@@ -1,4 +1,3 @@
1
- # Credit to Team UVR : https://github.com/Anjok07/ultimatevocalremovergui
2
  from typing import Optional, Union
3
  import numpy as np
4
  import torchaudio
 
 
1
  from typing import Optional, Union
2
  import numpy as np
3
  import torchaudio
modules/whisper/faster_whisper_inference.py CHANGED
@@ -11,7 +11,7 @@ import whisper
11
  import gradio as gr
12
  from argparse import Namespace
13
 
14
- from modules.utils.paths import (FASTER_WHISPER_MODELS_DIR, DIARIZATION_MODELS_DIR, OUTPUT_DIR)
15
  from modules.whisper.whisper_parameter import *
16
  from modules.whisper.whisper_base import WhisperBase
17
 
@@ -20,11 +20,13 @@ class FasterWhisperInference(WhisperBase):
20
  def __init__(self,
21
  model_dir: str = FASTER_WHISPER_MODELS_DIR,
22
  diarization_model_dir: str = DIARIZATION_MODELS_DIR,
 
23
  output_dir: str = OUTPUT_DIR,
24
  ):
25
  super().__init__(
26
  model_dir=model_dir,
27
  diarization_model_dir=diarization_model_dir,
 
28
  output_dir=output_dir
29
  )
30
  self.model_dir = model_dir
 
11
  import gradio as gr
12
  from argparse import Namespace
13
 
14
+ from modules.utils.paths import (FASTER_WHISPER_MODELS_DIR, DIARIZATION_MODELS_DIR, UVR_MODELS_DIR, OUTPUT_DIR)
15
  from modules.whisper.whisper_parameter import *
16
  from modules.whisper.whisper_base import WhisperBase
17
 
 
20
  def __init__(self,
21
  model_dir: str = FASTER_WHISPER_MODELS_DIR,
22
  diarization_model_dir: str = DIARIZATION_MODELS_DIR,
23
+ uvr_model_dir: str = UVR_MODELS_DIR,
24
  output_dir: str = OUTPUT_DIR,
25
  ):
26
  super().__init__(
27
  model_dir=model_dir,
28
  diarization_model_dir=diarization_model_dir,
29
+ uvr_model_dir=uvr_model_dir,
30
  output_dir=output_dir
31
  )
32
  self.model_dir = model_dir
modules/whisper/insanely_fast_whisper_inference.py CHANGED
@@ -11,7 +11,7 @@ import whisper
11
  from rich.progress import Progress, TimeElapsedColumn, BarColumn, TextColumn
12
  from argparse import Namespace
13
 
14
- from modules.utils.paths import (INSANELY_FAST_WHISPER_MODELS_DIR, DIARIZATION_MODELS_DIR, OUTPUT_DIR)
15
  from modules.whisper.whisper_parameter import *
16
  from modules.whisper.whisper_base import WhisperBase
17
 
@@ -20,12 +20,14 @@ class InsanelyFastWhisperInference(WhisperBase):
20
  def __init__(self,
21
  model_dir: str = INSANELY_FAST_WHISPER_MODELS_DIR,
22
  diarization_model_dir: str = DIARIZATION_MODELS_DIR,
 
23
  output_dir: str = OUTPUT_DIR,
24
  ):
25
  super().__init__(
26
  model_dir=model_dir,
27
  output_dir=output_dir,
28
- diarization_model_dir=diarization_model_dir
 
29
  )
30
  self.model_dir = model_dir
31
  os.makedirs(self.model_dir, exist_ok=True)
 
11
  from rich.progress import Progress, TimeElapsedColumn, BarColumn, TextColumn
12
  from argparse import Namespace
13
 
14
+ from modules.utils.paths import (INSANELY_FAST_WHISPER_MODELS_DIR, DIARIZATION_MODELS_DIR, UVR_MODELS_DIR, OUTPUT_DIR)
15
  from modules.whisper.whisper_parameter import *
16
  from modules.whisper.whisper_base import WhisperBase
17
 
 
20
  def __init__(self,
21
  model_dir: str = INSANELY_FAST_WHISPER_MODELS_DIR,
22
  diarization_model_dir: str = DIARIZATION_MODELS_DIR,
23
+ uvr_model_dir: str = UVR_MODELS_DIR,
24
  output_dir: str = OUTPUT_DIR,
25
  ):
26
  super().__init__(
27
  model_dir=model_dir,
28
  output_dir=output_dir,
29
+ diarization_model_dir=diarization_model_dir,
30
+ uvr_model_dir=uvr_model_dir
31
  )
32
  self.model_dir = model_dir
33
  os.makedirs(self.model_dir, exist_ok=True)
modules/whisper/whisper_Inference.py CHANGED
@@ -7,7 +7,7 @@ import torch
7
  import os
8
  from argparse import Namespace
9
 
10
- from modules.utils.paths import (WHISPER_MODELS_DIR, DIARIZATION_MODELS_DIR, OUTPUT_DIR)
11
  from modules.whisper.whisper_base import WhisperBase
12
  from modules.whisper.whisper_parameter import *
13
 
@@ -16,12 +16,14 @@ class WhisperInference(WhisperBase):
16
  def __init__(self,
17
  model_dir: str = WHISPER_MODELS_DIR,
18
  diarization_model_dir: str = DIARIZATION_MODELS_DIR,
 
19
  output_dir: str = OUTPUT_DIR,
20
  ):
21
  super().__init__(
22
  model_dir=model_dir,
23
  output_dir=output_dir,
24
- diarization_model_dir=diarization_model_dir
 
25
  )
26
 
27
  def transcribe(self,
 
7
  import os
8
  from argparse import Namespace
9
 
10
+ from modules.utils.paths import (WHISPER_MODELS_DIR, DIARIZATION_MODELS_DIR, OUTPUT_DIR, UVR_MODELS_DIR)
11
  from modules.whisper.whisper_base import WhisperBase
12
  from modules.whisper.whisper_parameter import *
13
 
 
16
  def __init__(self,
17
  model_dir: str = WHISPER_MODELS_DIR,
18
  diarization_model_dir: str = DIARIZATION_MODELS_DIR,
19
+ uvr_model_dir: str = UVR_MODELS_DIR,
20
  output_dir: str = OUTPUT_DIR,
21
  ):
22
  super().__init__(
23
  model_dir=model_dir,
24
  output_dir=output_dir,
25
+ diarization_model_dir=diarization_model_dir,
26
+ uvr_model_dir=uvr_model_dir
27
  )
28
 
29
  def transcribe(self,
modules/whisper/whisper_base.py CHANGED
@@ -251,6 +251,7 @@ class WhisperBase(ABC):
251
  def transcribe_mic(self,
252
  mic_audio: str,
253
  file_format: str,
 
254
  progress=gr.Progress(),
255
  *whisper_params,
256
  ) -> list:
@@ -263,6 +264,8 @@ class WhisperBase(ABC):
263
  Audio file path from gr.Microphone()
264
  file_format: str
265
  Subtitle File format to write from gr.Dropdown(). Supported format: [SRT, WebVTT, txt]
 
 
266
  progress: gr.Progress
267
  Indicator to show progress directly in gradio.
268
  *whisper_params: tuple
@@ -280,6 +283,7 @@ class WhisperBase(ABC):
280
  transcribed_segments, time_for_task = self.run(
281
  mic_audio,
282
  progress,
 
283
  *whisper_params,
284
  )
285
  progress(1, desc="Completed!")
@@ -287,7 +291,7 @@ class WhisperBase(ABC):
287
  subtitle, result_file_path = self.generate_and_write_file(
288
  file_name="Mic",
289
  transcribed_segments=transcribed_segments,
290
- add_timestamp=True,
291
  file_format=file_format,
292
  output_dir=self.output_dir
293
  )
 
251
  def transcribe_mic(self,
252
  mic_audio: str,
253
  file_format: str,
254
+ add_timestamp: bool,
255
  progress=gr.Progress(),
256
  *whisper_params,
257
  ) -> list:
 
264
  Audio file path from gr.Microphone()
265
  file_format: str
266
  Subtitle File format to write from gr.Dropdown(). Supported format: [SRT, WebVTT, txt]
267
+ add_timestamp: bool
268
+ Boolean value from gr.Checkbox() that determines whether to add a timestamp at the end of the filename.
269
  progress: gr.Progress
270
  Indicator to show progress directly in gradio.
271
  *whisper_params: tuple
 
283
  transcribed_segments, time_for_task = self.run(
284
  mic_audio,
285
  progress,
286
+ add_timestamp,
287
  *whisper_params,
288
  )
289
  progress(1, desc="Completed!")
 
291
  subtitle, result_file_path = self.generate_and_write_file(
292
  file_name="Mic",
293
  transcribed_segments=transcribed_segments,
294
+ add_timestamp=add_timestamp,
295
  file_format=file_format,
296
  output_dir=self.output_dir
297
  )
modules/whisper/whisper_factory.py CHANGED
@@ -2,7 +2,7 @@ from typing import Optional
2
  import os
3
 
4
  from modules.utils.paths import (FASTER_WHISPER_MODELS_DIR, DIARIZATION_MODELS_DIR, OUTPUT_DIR,
5
- INSANELY_FAST_WHISPER_MODELS_DIR, WHISPER_MODELS_DIR)
6
  from modules.whisper.faster_whisper_inference import FasterWhisperInference
7
  from modules.whisper.whisper_Inference import WhisperInference
8
  from modules.whisper.insanely_fast_whisper_inference import InsanelyFastWhisperInference
@@ -17,6 +17,7 @@ class WhisperFactory:
17
  faster_whisper_model_dir: str = FASTER_WHISPER_MODELS_DIR,
18
  insanely_fast_whisper_model_dir: str = INSANELY_FAST_WHISPER_MODELS_DIR,
19
  diarization_model_dir: str = DIARIZATION_MODELS_DIR,
 
20
  output_dir: str = OUTPUT_DIR,
21
  ) -> "WhisperBase":
22
  """
@@ -37,6 +38,8 @@ class WhisperFactory:
37
  Directory path for the Insanely Fast Whisper model.
38
  diarization_model_dir : str
39
  Directory path for the diarization model.
 
 
40
  output_dir : str
41
  Directory path where output files will be saved.
42
 
@@ -61,23 +64,27 @@ class WhisperFactory:
61
  return FasterWhisperInference(
62
  model_dir=faster_whisper_model_dir,
63
  output_dir=output_dir,
64
- diarization_model_dir=diarization_model_dir
 
65
  )
66
  elif whisper_type in whisper_typos:
67
  return WhisperInference(
68
  model_dir=whisper_model_dir,
69
  output_dir=output_dir,
70
- diarization_model_dir=diarization_model_dir
 
71
  )
72
  elif whisper_type in insanely_fast_whisper_typos:
73
  return InsanelyFastWhisperInference(
74
  model_dir=insanely_fast_whisper_model_dir,
75
  output_dir=output_dir,
76
- diarization_model_dir=diarization_model_dir
 
77
  )
78
  else:
79
  return FasterWhisperInference(
80
  model_dir=faster_whisper_model_dir,
81
  output_dir=output_dir,
82
- diarization_model_dir=diarization_model_dir
 
83
  )
 
2
  import os
3
 
4
  from modules.utils.paths import (FASTER_WHISPER_MODELS_DIR, DIARIZATION_MODELS_DIR, OUTPUT_DIR,
5
+ INSANELY_FAST_WHISPER_MODELS_DIR, WHISPER_MODELS_DIR, UVR_MODELS_DIR)
6
  from modules.whisper.faster_whisper_inference import FasterWhisperInference
7
  from modules.whisper.whisper_Inference import WhisperInference
8
  from modules.whisper.insanely_fast_whisper_inference import InsanelyFastWhisperInference
 
17
  faster_whisper_model_dir: str = FASTER_WHISPER_MODELS_DIR,
18
  insanely_fast_whisper_model_dir: str = INSANELY_FAST_WHISPER_MODELS_DIR,
19
  diarization_model_dir: str = DIARIZATION_MODELS_DIR,
20
+ uvr_model_dir: str = UVR_MODELS_DIR,
21
  output_dir: str = OUTPUT_DIR,
22
  ) -> "WhisperBase":
23
  """
 
38
  Directory path for the Insanely Fast Whisper model.
39
  diarization_model_dir : str
40
  Directory path for the diarization model.
41
+ uvr_model_dir : str
42
+ Directory path for the UVR model.
43
  output_dir : str
44
  Directory path where output files will be saved.
45
 
 
64
  return FasterWhisperInference(
65
  model_dir=faster_whisper_model_dir,
66
  output_dir=output_dir,
67
+ diarization_model_dir=diarization_model_dir,
68
+ uvr_model_dir=uvr_model_dir
69
  )
70
  elif whisper_type in whisper_typos:
71
  return WhisperInference(
72
  model_dir=whisper_model_dir,
73
  output_dir=output_dir,
74
+ diarization_model_dir=diarization_model_dir,
75
+ uvr_model_dir=uvr_model_dir
76
  )
77
  elif whisper_type in insanely_fast_whisper_typos:
78
  return InsanelyFastWhisperInference(
79
  model_dir=insanely_fast_whisper_model_dir,
80
  output_dir=output_dir,
81
+ diarization_model_dir=diarization_model_dir,
82
+ uvr_model_dir=uvr_model_dir
83
  )
84
  else:
85
  return FasterWhisperInference(
86
  model_dir=faster_whisper_model_dir,
87
  output_dir=output_dir,
88
+ diarization_model_dir=diarization_model_dir,
89
+ uvr_model_dir=uvr_model_dir
90
  )
notebook/whisper-webui.ipynb CHANGED
@@ -58,7 +58,8 @@
58
  "# Temporal bug fix from https://github.com/jhj0517/Whisper-WebUI/issues/256\n",
59
  "!pip install git+https://github.com/JuanBindez/pytubefix.git\n",
60
  "!pip install tokenizers==0.19.1\n",
61
- "!pip install pyannote.audio==3.3.1"
 
62
  ]
63
  },
64
  {
@@ -96,7 +97,7 @@
96
  },
97
  {
98
  "cell_type": "code",
99
- "execution_count": null,
100
  "metadata": {
101
  "id": "PQroYRRZzQiN",
102
  "cellView": "form"
 
58
  "# Temporal bug fix from https://github.com/jhj0517/Whisper-WebUI/issues/256\n",
59
  "!pip install git+https://github.com/JuanBindez/pytubefix.git\n",
60
  "!pip install tokenizers==0.19.1\n",
61
+ "!pip install pyannote.audio==3.3.1\n",
62
+ "!pip install git+https://github.com/jhj0517/ultimatevocalremover_api.git"
63
  ]
64
  },
65
  {
 
97
  },
98
  {
99
  "cell_type": "code",
100
+ "execution_count": 3,
101
  "metadata": {
102
  "id": "PQroYRRZzQiN",
103
  "cellView": "form"