File size: 4,233 Bytes
068b7da
 
 
 
6d72e83
f274110
cd428c2
 
068b7da
 
 
 
 
 
6d72e83
 
 
 
 
 
609ffca
f274110
 
 
 
 
 
 
 
 
 
84607d1
cd428c2
068b7da
f274110
 
84607d1
f274110
 
 
 
84607d1
 
 
 
 
 
 
6d72e83
 
 
 
 
 
 
 
 
f274110
6d72e83
 
 
f274110
068b7da
6d72e83
f274110
068b7da
 
6d72e83
 
 
 
84607d1
068b7da
 
 
 
c92f138
e185191
 
 
 
 
 
 
 
 
 
068b7da
6d72e83
 
 
 
068b7da
 
f274110
6d72e83
 
 
f274110
6d72e83
f274110
84607d1
e185191
84607d1
 
e185191
 
068b7da
 
f274110
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import gradio as gr
import torch
from TTS.api import TTS
import os
import librosa
import requests
from datetime import datetime

# Get device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Initialize TTS model
tts = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False).to(device)

def convert_audio_to_wav(file_path):
    """Convert any supported format (mp3, etc.) to wav using librosa"""
    output_path = "temp_input.wav"
    audio, sr = librosa.load(file_path, sr=None)  # Load file (wav, mp3, etc.)
    librosa.output.write_wav(output_path, audio, sr)  # Convert to wav
    return output_path

def upload_to_file_io(file_path):
    """Uploads a file to file.io and returns the temporary link"""
    url = "https://file.io"
    with open(file_path, 'rb') as f:
        response = requests.post(url, files={"file": f})
    if response.status_code == 200:
        temp_link = response.json().get('link')
        return temp_link
    return None

def voice_conversion(input_audio, target_voice, uploaded_target_voice, check_duration=True):
    print(datetime.now())
    output_path = "output.wav"

    # Check audio duration if the flag is True
    if check_duration:
        duration = librosa.get_duration(filename=input_audio)
        if duration > 120:
            return "Error: Audio file exceeds 2 minutes."

    # Upload input audio to file.io and log the link
    input_file_link = upload_to_file_io(input_audio)
    if input_file_link:
        print(f"Input file uploaded to: {input_file_link}")  # Log the input file link to the terminal
    else:
        print("Error uploading the input file to file.io")

    # Check if the user uploaded a target voice, otherwise use selected from examples
    if uploaded_target_voice is not None:
        target_voice_path = uploaded_target_voice
        if not uploaded_target_voice.endswith(".wav"):
            target_voice_path = convert_audio_to_wav(uploaded_target_voice)
    else:
        target_voice_path = os.path.join("Examples", target_voice)
        if not os.path.exists(target_voice_path):
            return "Error: Target voice file not found."

    # Convert input audio to wav if necessary
    if not input_audio.endswith(".wav"):
        input_audio = convert_audio_to_wav(input_audio)

    # Perform voice conversion
    tts.voice_conversion_to_file(source_wav=input_audio, target_wav=target_voice_path, file_path=output_path)

    return output_path

# Get examples from Examples folder
examples_folder = "Examples/"
example_files = [f for f in os.listdir(examples_folder) if f.endswith(".wav")]

# Define Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("## Voice Conversion using Coqui TTS")
    
    with gr.Row():
        input_audio = gr.Audio(label="Record or Upload Your Voice", type="filepath")
        target_voice = gr.Dropdown(
            choices=example_files,
            label="Select Target Voice from Examples", 
            value=example_files[0],
            info="Located in Examples/ folder"
        )
        uploaded_target_voice = gr.Audio(
            label="Or Upload Your Own Target Voice",
            type="filepath"
        )

    with gr.Row():
        play_button = gr.Button("Preview Selected Target Voice")
        preview_audio = gr.Audio(label="Preview Target Voice", type="filepath")
    
    convert_button = gr.Button("Convert Voice")
    output_audio = gr.Audio(label="Converted Voice", type="filepath")

    # Preview button for listening to the selected target voice from examples
    def preview_target_voice(selected_target_voice):
        return os.path.join(examples_folder, selected_target_voice)

    play_button.click(preview_target_voice, inputs=[target_voice], outputs=preview_audio)

    # Conversion process with duration restriction (enabled by default) and file.io input upload
    convert_button.click(
        lambda input_audio, target_voice, uploaded_target_voice: voice_conversion(input_audio, target_voice, uploaded_target_voice, check_duration=True),
        inputs=[input_audio, target_voice, uploaded_target_voice],
        outputs=output_audio
    )

# Launch with public=True for public URL access and share link
demo.launch(share=True)