Spaces:

awacke1
/

CodeCompetitionClaudeVsGPT

Running

File size: 13,315 Bytes

import streamlit as st
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import torch
import json
import os
import glob
from pathlib import Path
from datetime import datetime
import edge_tts
import asyncio
import base64
import requests
import plotly.graph_objects as go
from gradio_client import Client
from collections import defaultdict
from bs4 import BeautifulSoup
from audio_recorder_streamlit import audio_recorder
import streamlit.components.v1 as components

# Page configuration
st.set_page_config(
    page_title="Video Search & Research Assistant",
    page_icon="🎥",
    layout="wide",
    initial_sidebar_state="auto",
)

# Initialize session state
if 'search_history' not in st.session_state:
    st.session_state['search_history'] = []
if 'last_voice_input' not in st.session_state:
    st.session_state['last_voice_input'] = ""
if 'transcript_history' not in st.session_state:
    st.session_state['transcript_history'] = []
if 'should_rerun' not in st.session_state:
    st.session_state['should_rerun'] = False

# Custom styling
st.markdown("""
<style>
    .main { background: linear-gradient(to right, #1a1a1a, #2d2d2d); color: #fff; }
    .stMarkdown { font-family: 'Helvetica Neue', sans-serif; }
    .stButton>button { margin-right: 0.5rem; }
</style>
""", unsafe_allow_html=True)

# Initialize components
speech_component = components.declare_component("speech_recognition", path="mycomponent")

class VideoSearch:
    def __init__(self):
        self.text_model = SentenceTransformer('all-MiniLM-L6-v2')
        self.load_dataset()
        
    def fetch_dataset_rows(self):
        """Fetch dataset from Hugging Face API with debug and caching"""
        try:
            # First try to load from local cache
            cache_file = "dataset_cache.json"
            if os.path.exists(cache_file):
                st.info("Loading from cache...")
                with open(cache_file, 'r', encoding='utf-8') as f:
                    data = json.load(f)
                return pd.DataFrame(data)

            st.info("Fetching from Hugging Face API...")
            url = "https://datasets-server.huggingface.co/first-rows?dataset=omegalabsinc%2Fomega-multimodal&config=default&split=train"
            
            # Add debug output
            st.write(f"Requesting URL: {url}")
            
            response = requests.get(url, timeout=30)
            st.write(f"Response status: {response.status_code}")
            
            if response.status_code == 200:
                data = response.json()
                
                # Debug output
                st.write("Response structure:", list(data.keys()))
                
                if 'rows' in data:
                    rows = data['rows']
                    
                    # Cache the response
                    with open(cache_file, 'w', encoding='utf-8') as f:
                        json.dump(rows, f)
                    
                    df = pd.DataFrame(rows)
                    
                    # Debug output
                    st.write("DataFrame columns:", list(df.columns))
                    st.write("Number of rows:", len(df))
                    
                    return df
                else:
                    st.error("No 'rows' found in API response")
                    st.write("API Response:", data)
                    
                    # Try loading example data
                    example_file = "example_data.json"
                    if os.path.exists(example_file):
                        st.info("Loading example data...")
                        with open(example_file, 'r', encoding='utf-8') as f:
                            example_data = json.load(f)
                        return pd.DataFrame(example_data)
                    
                    return None
            else:
                st.error(f"API request failed with status code: {response.status_code}")
                if response.status_code == 404:
                    st.error("Dataset not found - check the dataset name and configuration")
                try:
                    error_details = response.json()
                    st.write("Error details:", error_details)
                except:
                    st.write("Raw response:", response.text)
                return None
                
        except Exception as e:
            st.error(f"Error fetching dataset: {str(e)}")
            import traceback
            st.write("Traceback:", traceback.format_exc())
            return None

    def load_dataset(self):
        try:
            self.dataset = self.fetch_dataset_rows()
            if self.dataset is not None:
                self.prepare_features()
            else:
                self.create_dummy_data()
        except Exception as e:
            st.error(f"Error loading dataset: {e}")
            self.create_dummy_data()
    
    def prepare_features(self):
        try:
            self.video_embeds = np.array([json.loads(e) if isinstance(e, str) else e 
                                        for e in self.dataset.video_embed])
            self.text_embeds = np.array([json.loads(e) if isinstance(e, str) else e 
                                       for e in self.dataset.description_embed])
        except Exception as e:
            st.error(f"Error preparing features: {e}")
            num_rows = len(self.dataset)
            self.video_embeds = np.random.randn(num_rows, 384)
            self.text_embeds = np.random.randn(num_rows, 384)
    
    def search(self, query, top_k=5):
        query_embedding = self.text_model.encode([query])[0]
        
        video_sims = cosine_similarity([query_embedding], self.video_embeds)[0]
        text_sims = cosine_similarity([query_embedding], self.text_embeds)[0]
        
        combined_sims = 0.5 * video_sims + 0.5 * text_sims
        top_indices = np.argsort(combined_sims)[-top_k:][::-1]
        
        results = []
        for idx in top_indices:
            results.append({
                'video_id': self.dataset.iloc[idx]['video_id'],
                'youtube_id': self.dataset.iloc[idx]['youtube_id'],
                'description': self.dataset.iloc[idx]['description'],
                'start_time': self.dataset.iloc[idx]['start_time'],
                'end_time': self.dataset.iloc[idx]['end_time'],
                'relevance_score': float(combined_sims[idx]),
                'views': self.dataset.iloc[idx]['views']
            })
        return results

def perform_arxiv_search(query, vocal_summary=True, extended_refs=False):
    """Perform Arxiv search with audio summaries"""
    try:
        client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
        refs = client.predict(query, 20, "Semantic Search", 
                            "mistralai/Mixtral-8x7B-Instruct-v0.1",
                            api_name="/update_with_rag_md")[0]
        response = client.predict(query, "mistralai/Mixtral-8x7B-Instruct-v0.1",
                                True, api_name="/ask_llm")
        
        result = f"### 🔎 {query}\n\n{response}\n\n{refs}"
        st.markdown(result)
        
        if vocal_summary:
            audio_file = asyncio.run(generate_speech(response[:500]))
            if audio_file:
                st.audio(audio_file)
                os.remove(audio_file)
        
        return result
    except Exception as e:
        st.error(f"Error in Arxiv search: {e}")
        return None

async def generate_speech(text, voice="en-US-AriaNeural"):
    """Generate speech using Edge TTS"""
    if not text.strip():
        return None
    
    try:
        communicate = edge_tts.Communicate(text, voice)
        audio_file = f"speech_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3"
        await communicate.save(audio_file)
        return audio_file
    except Exception as e:
        st.error(f"Error generating speech: {e}")
        return None

def process_audio_input(audio_bytes):
    """Process audio input from recorder"""
    if audio_bytes:
        # Save temporary file
        audio_path = f"temp_audio_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
        with open(audio_path, "wb") as f:
            f.write(audio_bytes)
        
        # Here you would typically use a speech-to-text service
        # For now, we'll just acknowledge the recording
        st.success("Audio recorded successfully!")
        
        # Cleanup
        if os.path.exists(audio_path):
            os.remove(audio_path)
        
        return True
    return False

def main():
    st.title("🎥 Video Search & Research Assistant")
    
    # Initialize search
    search = VideoSearch()
    
    # Create main tabs
    tab1, tab2, tab3 = st.tabs(["🔍 Video Search", "🎙️ Voice & Audio", "📚 Arxiv Research"])
    
    with tab1:
        st.subheader("Search Video Dataset")
        
        # Text search
        query = st.text_input("Enter your search query:")
        col1, col2 = st.columns(2)
        
        with col1:
            search_button = st.button("🔍 Search")
        with col2:
            num_results = st.slider("Number of results:", 1, 10, 5)
            
        if search_button and query:
            results = search.search(query, num_results)
            st.session_state['search_history'].append({
                'query': query,
                'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                'results': results
            })
            
            for i, result in enumerate(results, 1):
                with st.expander(f"Result {i}: {result['description'][:100]}...", expanded=i==1):
                    cols = st.columns([2, 1])
                    
                    with cols[0]:
                        st.markdown(f"**Full Description:**")
                        st.write(result['description'])
                        st.markdown(f"**Time Range:** {result['start_time']}s - {result['end_time']}s")
                        st.markdown(f"**Views:** {result['views']:,}")
                    
                    with cols[1]:
                        st.markdown(f"**Relevance Score:** {result['relevance_score']:.2%}")
                        if result['youtube_id']:
                            st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result['start_time']}")
                        
                        # Generate audio summary
                        if st.button(f"🔊 Generate Audio Summary", key=f"audio_{i}"):
                            summary = f"Video summary: {result['description'][:200]}"
                            audio_file = asyncio.run(generate_speech(summary))
                            if audio_file:
                                st.audio(audio_file)
                                os.remove(audio_file)

    with tab2:
        st.subheader("Voice Input & Audio Recording")
        
        col1, col2 = st.columns(2)
        with col1:
            st.write("🎙️ Speech Recognition")
            voice_input = speech_component()
            
            if voice_input and voice_input != st.session_state['last_voice_input']:
                st.session_state['last_voice_input'] = voice_input
                st.markdown("**Transcribed Text:**")
                st.write(voice_input)
                
                if st.button("🔍 Search Videos"):
                    results = search.search(voice_input, num_results)
                    for i, result in enumerate(results, 1):
                        with st.expander(f"Result {i}", expanded=i==1):
                            st.write(result['description'])
                            if result['youtube_id']:
                                st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result['start_time']}")
        
        with col2:
            st.write("🎵 Audio Recorder")
            audio_bytes = audio_recorder()
            if audio_bytes:
                process_audio_input(audio_bytes)

    with tab3:
        st.subheader("Arxiv Research")
        arxiv_query = st.text_input("🔍 Research Query:")
        
        col1, col2 = st.columns(2)
        with col1:
            vocal_summary = st.checkbox("Generate Audio Summary", value=True)
        with col2:
            extended_refs = st.checkbox("Include Extended References", value=False)
            
        if st.button("🔍 Search Arxiv") and arxiv_query:
            perform_arxiv_search(arxiv_query, vocal_summary, extended_refs)

    # Sidebar for history and settings
    with st.sidebar:
        st.subheader("⚙️ Settings & History")
        
        if st.button("🗑️ Clear History"):
            st.session_state['search_history'] = []
            st.experimental_rerun()
        
        st.markdown("### Recent Searches")
        for entry in reversed(st.session_state['search_history'][-5:]):
            st.markdown(f"**{entry['timestamp']}**: {entry['query']}")

        st.markdown("### Voice Settings")
        st.selectbox("TTS Voice:", 
                    ["en-US-AriaNeural", "en-US-GuyNeural", "en-GB-SoniaNeural"],
                    key="tts_voice")

if __name__ == "__main__":
    main()