Spaces:
Running
Running
File size: 5,848 Bytes
eeb01ec b597f21 495e2d9 cfd5440 d728ac6 eeb01ec b597f21 d728ac6 b597f21 d728ac6 b597f21 cfd5440 b597f21 cfd5440 b597f21 d728ac6 cfd5440 b597f21 495e2d9 b597f21 d728ac6 b597f21 cfd5440 b597f21 cfd5440 b597f21 d728ac6 eeb01ec 9959a9b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
import gradio as gr
import pyperclip
import urllib.parse as urlparse
from pytube import YouTube
import re
import subprocess
from lang_list import ORIGINAL_LANGUAGE_NAME_TO_CODE, S2ST_TARGET_ORIGINAL_LANGUAGE_NAMES
import torch
from seamless_communication.models.inference import Translator
import time
YOUTUBE = "youtube"
TWITCH = "twitch"
# Initialize a Translator object with a multitask model, vocoder on the GPU.
# translator = Translator("seamlessM4T_large", vocoder_name_or_card="vocoder_36langs", device=torch.device("cuda:0"))
def copy_url_from_clipboard():
return pyperclip.paste()
def clear_video_url():
return ""
def get_youtube_thumbnail(video_id):
thumbnail_url = f"https://img.youtube.com/vi/{video_id}/0.jpg"
return thumbnail_url
def get_youtube_video_id(url):
parsed_url = urlparse.urlparse(url)
video_id = urlparse.parse_qs(parsed_url.query).get('v')
if video_id:
thumbnail_url = get_youtube_thumbnail(video_id[0])
return thumbnail_url
else:
return None
def is_valid_url(url):
button = gr.Button(size="sm", value="translate", min_width="10px", scale=0, visible=True)
original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=True, interactive=False)
translated_audio = gr.Audio(label="Translated audio", elem_id="translated_audio", visible=True)
source_languaje = gr.Dropdown(visible=True, label="Source languaje", show_label=True, value="English", choices=ORIGINAL_LANGUAGE_NAME_TO_CODE, scale=1, interactive=True)
target_languaje = gr.Dropdown(visible=True, label="Target languaje", show_label=True, value="Español", choices=S2ST_TARGET_ORIGINAL_LANGUAGE_NAMES, scale=1, interactive=True)
if "youtube" in url.lower() or "youtu.be" in url.lower():
thumbnail = get_youtube_video_id(url)
if thumbnail:
return (
gr.Image(value=thumbnail, visible=True, show_download_button=False, container=False),
source_languaje,
target_languaje,
button,
gr.Textbox(value=YOUTUBE, label="Stream page", elem_id="stream_page", visible=False),
original_audio,
translated_audio,
)
elif "twitch" in url.lower() or "twitch.tv" in url.lower():
return (
gr.Image(value="assets/twitch.webp", visible=True, show_download_button=False, container=False),
source_languaje,
target_languaje,
button,
gr.Textbox(value=TWITCH, label="Stream page", elem_id="stream_page", visible=False),
original_audio,
translated_audio,
)
def get_audio_from_video(url, stream_page):
if stream_page == YOUTUBE:
yt = YouTube(url)
audio_streams = yt.streams.filter(mime_type="audio/mp4")
# Get all available audio bitrates
abr_list = []
for stream in audio_streams:
abr_list.append(stream.abr)
abr_list = sorted(set(abr_list))
# Get the highest audio bitrate
audio_stream = audio_streams.filter(abr=abr_list[0]).first()
# Download the audio
audio_stream.download(filename="audio.mp3")
return gr.Audio("audio.mp3", label="Original audio", elem_id="original_audio", visible=True)
elif stream_page == TWITCH:
# Get the video id
video_id = re.search("\d{10}", url).group(0)
# Download the video
subprocess.run(["twitch-dl", "download", "--overwrite", "-q", "audio_only", "--output", "audio.mkv", video_id])
return gr.Audio("audio.mkv", label="Original audio", elem_id="original_audio", visible=True)
# def translate_audio(input_audio, target_languaje):
# print("Translating audio...")
# time.sleep(5)
# print("Translating audio...")
# _, wav, _ = translator.predict(input_audio, "s2st", target_languaje)
# return gr.Audio(wav, label="Translated audio", elem_id="translated_audio", visible=True)
with gr.Blocks() as demo:
with gr.Row(variant="panel"):
url_textbox = gr.Textbox(placeholder="Add video URL here", label="Video URL", elem_id="video_url", scale=1, interactive=True)
copy_button = gr.Button(size="sm", icon="icons/copy.svg", value="", min_width="10px", scale=0)
delete_button = gr.Button(size="sm", icon="icons/delete.svg", value="", min_width="10px", scale=0)
copy_button.click(fn=copy_url_from_clipboard, outputs=url_textbox)
delete_button.click(fn=clear_video_url, outputs=url_textbox)
stream_page = gr.Textbox(label="Stream page", elem_id="stream_page", visible=False)
visible = False
with gr.Row(equal_height=False):
image = gr.Image(visible=visible, scale=1)
with gr.Column():
with gr.Row():
source_languaje = gr.Dropdown(visible=visible, label="Source languaje", choices=ORIGINAL_LANGUAGE_NAME_TO_CODE, scale=1, interactive=True)
target_languaje = gr.Dropdown(visible=visible, label="Target languaje", choices=S2ST_TARGET_ORIGINAL_LANGUAGE_NAMES, scale=1, interactive=True)
translate_button = gr.Button(size="lg", value="translate", min_width="10px", visible=visible)
original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible)
translated_audio = gr.Audio(label="Translated audio", elem_id="translated_audio", visible=visible)
url_textbox.change(fn=is_valid_url, inputs=url_textbox, outputs=[image, source_languaje, target_languaje, translate_button, stream_page, original_audio, translated_audio])
translate_button.click(fn=get_audio_from_video, inputs=[url_textbox, stream_page], outputs=original_audio)
# original_audio.change(fn=translate_audio, inputs=[original_audio, target_languaje], outputs=translated_audio)
demo.launch() |