Anupam251272's picture
Update app.py
74d2202 verified
import os
import gradio as gr
import yt_dlp
import torch
from transformers import pipeline, AutoProcessor, AutoModelForSpeechSeq2Seq
import whisper
import threading
from queue import Queue
import time
# Ensure the cache directory exists
MODEL_CACHE = "./models"
os.makedirs(MODEL_CACHE, exist_ok=True)
# Device setup
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
# Load Whisper model with cache_dir
processor = AutoProcessor.from_pretrained("openai/whisper-large-v3-turbo", cache_dir=MODEL_CACHE)
model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-large-v3-turbo", cache_dir=MODEL_CACHE).to(device)
# Load summarization model
summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=-1)
audio_path = "/tmp/temp_audio.wav" # Use /tmp for temporary files
def process_audio(video_id):
"""Download and transcribe YouTube audio"""
ydl_opts = {
"format": "bestaudio/best",
"quiet": True,
"outtmpl": "/tmp/temp_audio.%(ext)s", # Save inside /tmp
"postprocessors": [{"key": "FFmpegExtractAudio", "preferredcodec": "wav"}],
}
try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([f"https://www.youtube.com/watch?v={video_id}"])
audio = whisper.load_audio(audio_path)
input_features = processor(audio, sampling_rate=16000, return_tensors="pt").input_features.to(device)
predicted_ids = model.generate(input_features)
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
return transcription
except Exception as e:
return f"Error processing audio: {e}"
finally:
if os.path.exists(audio_path):
os.remove(audio_path) # Clean up
def summarize_text(text):
"""Summarize transcript into a short version."""
if len(text.split()) < 10:
return "Transcript too short for summarization."
try:
summary = summarizer(text, max_length=50, min_length=10, do_sample=False)
return summary[0]['summary_text']
except Exception as e:
return "Summarization failed"
with gr.Blocks() as demo:
gr.Markdown("# πŸŽ₯ YouTube Real-Time Transcriber")
video_input = gr.Textbox(label="Enter YouTube Video ID")
process_button = gr.Button("Transcribe")
video_embed = gr.HTML()
transcript_output = gr.Textbox(label="Live Transcript", interactive=False)
summary_output = gr.Textbox(label="Summary", interactive=False)
def handle_video(video_id):
embed_html = f"""<iframe width='560' height='315' src='https://www.youtube.com/embed/{video_id}' frameborder='0' allowfullscreen></iframe>"""
transcript = process_audio(video_id)
summary = summarize_text(transcript) if transcript else ""
return embed_html, transcript, summary
process_button.click(handle_video, inputs=[video_input], outputs=[video_embed, transcript_output, summary_output])
demo.launch(debug=True)