import streamlit as st from phi.agent import Agent from phi.model.google import Gemini from phi.tools.duckduckgo import DuckDuckGo from google.generativeai import upload_file, get_file import time import google.generativeai as genai from pathlib import Path import tempfile import os import yt_dlp from dotenv import load_dotenv load_dotenv() API_KEY = os.getenv("GOOGLE_API_KEY") if API_KEY: genai.configure(api_key=API_KEY) # Page Configuration st.set_page_config( page_title="Agentic Video Captioning Platform::", layout="wide" ) st.title("Generate the story of videos:") st.header("Agentic Video Captioning Platform:") @st.cache_resource def initialize_agent(): return Agent( name="Video AI summarizer", model=Gemini(id="gemini-2.0-flash-exp"), tools=[DuckDuckGo()], markdown=True, ) # Initialize the agent multimodal_Agent = initialize_agent() def download_tiktok_video(url: str, output_path: str): """Downloads a TikTok video using yt-dlp.""" ydl_opts = { 'outtmpl': output_path, # Path where the video will be saved 'format': 'best', # Download the best quality available } try: with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download([url]) return True, f"Video downloaded successfully to {output_path}" except Exception as e: return False, str(e) # URL input for TikTok video video_url = st.text_input("Enter TikTok video URL") if st.button('Generate the story'): if not video_url: st.warning('PLEASE ENTER A VALID URL') else: # Download the TikTok video with st.spinner("Downloading video..."): temp_video_path = os.path.join(tempfile.gettempdir(), 'tiktok_video.mp4') success, message = download_tiktok_video(video_url, temp_video_path) if not success: st.error(f"Error downloading video: {message}") else: st.success(message) # Display the video st.video(temp_video_path, format="video/mp4", start_time=0) try: with st.spinner("Generating the story of the video"): # Upload and process the video processed_video = upload_file(temp_video_path) while processed_video.state.name == "PROCESSING": time.sleep(1) processed_video = get_file(processed_video.name) # Analysis prompt analysis_prompt = ( f''' Analyze the video thoroughly and provide a comprehensive storyline. The total generation should not be more than 500 words. As a whole from the video, detail the following aspects: 1. **Story:** How the set scene introduced and tone is set. What is happening in the scene? Describe key visuals and actions. 2. **Characters**: Identify each character, noting their expressions, attire, actions, and interactions. Highlight emotional nuances and gestures. 1. **Narration or Voiceover**: Describe what types of narrations or voiceovers are used in the video. 3. **Transitions and Pacing**: Describe scene changes, including cuts, fades, zooms, or shifts in focus, and how they affect the flow. 4. **Mood and Tone**: Capture the overall mood and tone of each scene, mentioning any music or sound effects that enhance these elements. 5. **Visible Texts or Brandings**: Capture all the visible texts from the videos and brandings too only if some brandings of product or service is done. Conclude with a cohesive summary that ties the scenes together, highlighting all promotional themes, ensuring each brand and key text is accurately referenced in the storyline. ''' ) # AI agent processing response = multimodal_Agent.run(analysis_prompt, videos=[processed_video]) st.subheader('Analysis Result') st.markdown(response.content) except Exception as error: st.error(f"An error occurred: {error}") finally: # Delete the downloaded TikTok video try: os.remove(temp_video_path) st.info("Downloaded TikTok video deleted.") except Exception as e: st.warning(f"Failed to delete video: {e}")