Spaces:

awacke1
/

CodeCompetitionClaudeVsGPT

Running

File size: 8,672 Bytes

7e7e4f5
 
 
 
 
 
 
 
 
 
 
9a9cd51
7e7e4f5
9a9cd51
 
 
 
 
 
 
 
 
 
7e7e4f5
 
9a9cd51
 
 
7e7e4f5
 
9a9cd51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e19b4b0
 
 
 
 
 
 
 
 
9a9cd51
 
 
 
 
 
 
e19b4b0
 
9a9cd51
 
 
7e7e4f5
9a9cd51
 
 
 
 
 
 
 
 
 
 
 
 
7e7e4f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e19b4b0
9a9cd51
7e7e4f5
 
 
 
 
9a9cd51
 
 
 
 
 
 
7e7e4f5
 
 
9a9cd51
 
7e7e4f5
 
 
 
 
 
9a9cd51
7e7e4f5
 
 
9a9cd51
7e7e4f5
9a9cd51
 
 
 
 
7e7e4f5
9a9cd51
 
 
 
 
 
 
 
 
7e7e4f5
9a9cd51
 
 
 
 
 
 
 
 
e19b4b0
 
 
 
9a9cd51
 
 
 
 
 
e19b4b0
 
 
 
9a9cd51
 
7e7e4f5
9a9cd51
7e7e4f5
9a9cd51
7e7e4f5
9a9cd51
7e7e4f5
9a9cd51
 
 
 
 
 
 
7e7e4f5
 
9a9cd51
7e7e4f5
9a9cd51
 
 
7e7e4f5
 
9a9cd51

import streamlit as st
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import torch
import json
import os
from pathlib import Path

class VideoRetrieval:
    def __init__(self, use_dummy_data=True):
        self.text_model = SentenceTransformer('all-MiniLM-L6-v2')
        if use_dummy_data:
            self.create_dummy_data()
        else:
            self.load_data()
        
    def create_dummy_data(self):
        """Create dummy features and metadata for demonstration"""
        # Create dummy features
        n_clips = 20
        feature_dim = 384  # matching the dimension of all-MiniLM-L6-v2
        
        self.features = {
            'visual_features': np.random.randn(n_clips, feature_dim),
            'scene_features': np.random.randn(n_clips, feature_dim),
            'object_features': np.random.randn(n_clips, feature_dim)
        }
        
        # Create dummy metadata
        movie_titles = [
            "The Matrix", "Inception", "The Dark Knight", "Pulp Fiction",
            "The Shawshank Redemption", "Forrest Gump", "The Godfather",
            "Fight Club", "Interstellar", "The Silence of the Lambs"
        ]
        
        descriptions = [
            "A dramatic confrontation in a dark room where the truth is revealed",
            "A high-stakes chase through a crowded city street",
            "An emotional reunion between long-lost friends",
            "A tense negotiation that determines the fate of many",
            "A quiet moment of reflection before a life-changing decision"
        ]
        
        # Sample YouTube clips (famous movie scenes)
        youtube_clips = [
            "https://www.youtube.com/watch?v=kcsNbQRU5TI",  # Matrix - Red Pill Blue Pill
            "https://www.youtube.com/watch?v=YoHD9XEInc0",  # Inception - Hallway Fight
            "https://www.youtube.com/watch?v=ZWCAf-xLV2k",  # Dark Knight - Interrogation
            "https://www.youtube.com/watch?v=Jomr9SAjcyw",  # Pulp Fiction - Restaurant
            "https://www.youtube.com/watch?v=SQ7_5MMbPYs",  # Shawshank - Hope Speech
        ]
        
        data = []
        for i in range(n_clips):
            data.append({
                'clip_id': f'clip_{i}',
                'movie_title': movie_titles[i % len(movie_titles)],
                'description': descriptions[i % len(descriptions)],
                'timestamp': f'{(i*5):02d}:00 - {(i*5+3):02d}:00',
                'duration': '3:00',
                'youtube_url': youtube_clips[i % len(youtube_clips)]
            })
        
        self.clips_df = pd.DataFrame(data)
        
    def load_data(self):
        """Load actual pre-computed features and metadata"""
        try:
            self.features = {
                'visual_features': np.load('path_to_visual_features.npy'),
                'scene_features': np.load('path_to_scene_features.npy'),
                'object_features': np.load('path_to_object_features.npy')
            }
            self.clips_df = pd.read_csv('clips_metadata.csv')
        except FileNotFoundError as e:
            st.error(f"Error loading data: {e}. Falling back to dummy data.")
            self.create_dummy_data()
    
    def encode_query(self, query_text):
        """Encode the text query into embeddings"""
        return self.text_model.encode(query_text)
    
    def compute_similarity(self, query_embedding, feature_type='visual_features'):
        """Compute similarity between query and video features"""
        similarities = cosine_similarity(
            query_embedding.reshape(1, -1),
            self.features[feature_type]
        )
        return similarities[0]
    
    def retrieve_clips(self, query_text, top_k=3):
        """Retrieve top-k most relevant clips based on query"""
        # Encode query
        query_embedding = self.encode_query(query_text)
        
        # Compute similarities for different feature types
        similarities = {}
        weights = {
            'visual_features': 0.4,
            'scene_features': 0.3,
            'object_features': 0.3
        }
        
        for feat_type, weight in weights.items():
            similarities[feat_type] = self.compute_similarity(query_embedding, feat_type) * weight
            
        # Combine similarities
        combined_similarities = sum(similarities.values())
        
        # Get top-k indices
        top_indices = np.argsort(combined_similarities)[-top_k:][::-1]
        
        # Return clip information
        results = []
        for idx in top_indices:
            results.append({
                'clip_id': self.clips_df.iloc[idx]['clip_id'],
                'movie_title': self.clips_df.iloc[idx]['movie_title'],
                'description': self.clips_df.iloc[idx]['description'],
                'timestamp': self.clips_df.iloc[idx]['timestamp'],
                'youtube_url': self.clips_df.iloc[idx]['youtube_url'],
                'similarity_score': float(combined_similarities[idx])  # Convert to float for JSON serialization
            })
        
        return results

def main():
    st.set_page_config(
        page_title="Movie Scene Retrieval System",
        page_icon="🎬",
        layout="wide"
    )
    
    st.title("🎬 Movie Scene Retrieval System")
    st.write("""
    Search for movie scenes using natural language descriptions.
    The system will retrieve the most relevant 2-3 minute clips based on your query.
    
    *Note: This is a demo version using simulated data.*
    """)
    
    # Initialize retrieval system
    try:
        retrieval_system = st.session_state.retrieval_system
    except AttributeError:
        retrieval_system = VideoRetrieval(use_dummy_data=True)
        st.session_state.retrieval_system = retrieval_system
    
    # Search interface
    col1, col2 = st.columns([3, 1])
    
    with col1:
        query = st.text_input(
            "Enter your scene description:",
            placeholder="e.g., A dramatic confrontation between two characters in a dark room"
        )
    
    with col2:
        num_results = st.slider("Number of results:", min_value=1, max_value=5, value=3)
    
    if st.button("🔍 Search", type="primary"):
        if not query:
            st.warning("Please enter a scene description.")
        else:
            with st.spinner("Searching for relevant clips..."):
                results = retrieval_system.retrieve_clips(query, top_k=num_results)
                
                for i, result in enumerate(results, 1):
                    with st.container():
                        st.subheader(f"{result['movie_title']}")
                        cols = st.columns([2, 1])
                        
                        with cols[0]:
                            st.markdown(f"**Scene Description:**")
                            st.write(result['description'])
                            st.text(f"⏱️ Timestamp: {result['timestamp']}")
                            
                            # Add video player
                            if result['youtube_url']:
                                st.video(result['youtube_url'])
                        
                        with cols[1]:
                            st.markdown("**Relevance Score:**")
                            score = min(1.0, max(0.0, result['similarity_score']))
                            st.progress(score)
                            st.text(f"{score:.2%} match")
                            
                            # Add direct YouTube link
                            st.markdown(f"[🔗 Watch on YouTube]({result['youtube_url']})")
                            st.text("Click to open in a new tab")
                        
                        st.divider()
    
    # Sidebar with additional information
    with st.sidebar:
        st.header("ℹ️ About")
        st.write("""
        This demo system simulates a video retrieval engine that uses:
        
        - 🎥 Visual scene understanding
        - 👥 Character interaction analysis
        - 🎯 Object detection
        - 🎭 Action recognition
        
        In a production system, these features would be pre-computed
        from actual movie clips using state-of-the-art AI models.
        """)
        
        st.header("⚙️ Feature Weights")
        st.write("Current weights used for similarity computation:")
        st.write("- 🎬 Visual Features: 40%")
        st.write("- 🏞️ Scene Features: 30%")
        st.write("- 📦 Object Features: 30%")

if __name__ == "__main__":
    main()