Spaces:

DvorakInnovationAI
/

Video-Captioning

Running

File size: 4,430 Bytes

import streamlit as st 
from phi.agent import Agent
from phi.model.google import Gemini
from phi.tools.duckduckgo import DuckDuckGo
from google.generativeai import upload_file, get_file
import time
import google.generativeai as genai
from pathlib import Path
import tempfile
import os
import yt_dlp

from dotenv import load_dotenv
load_dotenv()

API_KEY = os.getenv("GOOGLE_API_KEY")
if API_KEY:
    genai.configure(api_key=API_KEY)

# Page Configuration
st.set_page_config(
    page_title="Agentic Video Captioning Platform::",
    layout="wide"
)

st.title("Generate the story of videos:")
st.header("Agentic Video Captioning Platform:")

@st.cache_resource
def initialize_agent():
    return Agent(
        name="Video AI summarizer",
        model=Gemini(id="gemini-2.0-flash-exp"),
        tools=[DuckDuckGo()],
        markdown=True,
    )

# Initialize the agent
multimodal_Agent = initialize_agent()

def download_tiktok_video(url: str, output_path: str):
    """Downloads a TikTok video using yt-dlp."""
    ydl_opts = {
        'outtmpl': output_path,  # Path where the video will be saved
        'format': 'best',        # Download the best quality available
    }
    
    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])
        return True, f"Video downloaded successfully to {output_path}"
    except Exception as e:
        return False, str(e)

# URL input for TikTok video
video_url = st.text_input("Enter TikTok video URL")

if st.button('Generate the story'):
    if not video_url:
        st.warning('PLEASE ENTER A VALID URL')
    else:
        # Download the TikTok video
        with st.spinner("Downloading video..."):
            temp_video_path = os.path.join(tempfile.gettempdir(), 'tiktok_video.mp4')
            success, message = download_tiktok_video(video_url, temp_video_path)

        if not success:
            st.error(f"Error downloading video: {message}")
        else:
            st.success(message)

            # Display the video
            st.video(temp_video_path, format="video/mp4", start_time=0)

            try:
                with st.spinner("Generating the story of the video"):
                    # Upload and process the video
                    processed_video = upload_file(temp_video_path)
                    while processed_video.state.name == "PROCESSING":
                        time.sleep(1)
                        processed_video = get_file(processed_video.name)

                    # Analysis prompt
                    analysis_prompt = (
                        f'''
Analyze the video thoroughly and provide a comprehensive storyline. The total generation should not be more than 500 words. As a whole from the video, detail the following aspects:

1. **Story:** How the set scene introduced and tone is set. What is happening in the scene? Describe key visuals and actions. 
2. **Characters**: Identify each character, noting their expressions, attire, actions, and interactions. Highlight emotional nuances and gestures.
1. **Narration or Voiceover**: Describe what types of narrations or voiceovers are used in the video.  
3. **Transitions and Pacing**: Describe scene changes, including cuts, fades, zooms, or shifts in focus, and how they affect the flow.
4. **Mood and Tone**: Capture the overall mood and tone of each scene, mentioning any music or sound effects that enhance these elements.
5. **Visible Texts or Brandings**: Capture all the visible texts from the videos and brandings too only if some brandings of product or service is done.

Conclude with a cohesive summary that ties the scenes together, highlighting all promotional themes, ensuring each brand and key text is accurately referenced in the storyline.
                        '''
                    )

                    # AI agent processing
                    response = multimodal_Agent.run(analysis_prompt, videos=[processed_video])

                st.subheader('Analysis Result')
                st.markdown(response.content)

            except Exception as error:
                st.error(f"An error occurred: {error}")
            finally:
                # Delete the downloaded TikTok video
                try:
                    os.remove(temp_video_path)
                    st.info("Downloaded TikTok video deleted.")
                except Exception as e:
                    st.warning(f"Failed to delete video: {e}")