|
import os |
|
import gradio as gr |
|
import yt_dlp |
|
import torch |
|
from transformers import pipeline, AutoProcessor, AutoModelForSpeechSeq2Seq |
|
import whisper |
|
import threading |
|
from queue import Queue |
|
import time |
|
|
|
|
|
MODEL_CACHE = "./models" |
|
os.makedirs(MODEL_CACHE, exist_ok=True) |
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
print(f"Using device: {device}") |
|
|
|
|
|
processor = AutoProcessor.from_pretrained("openai/whisper-large-v3-turbo", cache_dir=MODEL_CACHE) |
|
model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-large-v3-turbo", cache_dir=MODEL_CACHE).to(device) |
|
|
|
|
|
summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=-1) |
|
|
|
audio_path = "/tmp/temp_audio.wav" |
|
|
|
def process_audio(video_id): |
|
"""Download and transcribe YouTube audio""" |
|
ydl_opts = { |
|
"format": "bestaudio/best", |
|
"quiet": True, |
|
"outtmpl": "/tmp/temp_audio.%(ext)s", |
|
"postprocessors": [{"key": "FFmpegExtractAudio", "preferredcodec": "wav"}], |
|
} |
|
try: |
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl: |
|
ydl.download([f"https://www.youtube.com/watch?v={video_id}"]) |
|
|
|
audio = whisper.load_audio(audio_path) |
|
input_features = processor(audio, sampling_rate=16000, return_tensors="pt").input_features.to(device) |
|
predicted_ids = model.generate(input_features) |
|
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0] |
|
|
|
return transcription |
|
except Exception as e: |
|
return f"Error processing audio: {e}" |
|
finally: |
|
if os.path.exists(audio_path): |
|
os.remove(audio_path) |
|
|
|
def summarize_text(text): |
|
"""Summarize transcript into a short version.""" |
|
if len(text.split()) < 10: |
|
return "Transcript too short for summarization." |
|
try: |
|
summary = summarizer(text, max_length=50, min_length=10, do_sample=False) |
|
return summary[0]['summary_text'] |
|
except Exception as e: |
|
return "Summarization failed" |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# π₯ YouTube Real-Time Transcriber") |
|
|
|
video_input = gr.Textbox(label="Enter YouTube Video ID") |
|
process_button = gr.Button("Transcribe") |
|
video_embed = gr.HTML() |
|
transcript_output = gr.Textbox(label="Live Transcript", interactive=False) |
|
summary_output = gr.Textbox(label="Summary", interactive=False) |
|
|
|
def handle_video(video_id): |
|
embed_html = f"""<iframe width='560' height='315' src='https://www.youtube.com/embed/{video_id}' frameborder='0' allowfullscreen></iframe>""" |
|
transcript = process_audio(video_id) |
|
summary = summarize_text(transcript) if transcript else "" |
|
return embed_html, transcript, summary |
|
|
|
process_button.click(handle_video, inputs=[video_input], outputs=[video_embed, transcript_output, summary_output]) |
|
|
|
demo.launch(debug=True) |
|
|