Spaces:

awacke1
/

CodeCompetitionClaudeVsGPT

Running

App Files Files Community

CodeCompetitionClaudeVsGPT / app.py

awacke1

Update app.py

cc67713 verified about 2 months ago

raw

history blame

11.1 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	from sentence_transformers import SentenceTransformer
	from sklearn.metrics.pairwise import cosine_similarity
	import torch
	import json
	import os
	from pathlib import Path
	from datetime import datetime
	import edge_tts
	import asyncio
	import base64
	import streamlit.components.v1 as components

	# Page configuration
	st.set_page_config(
	page_title="Video Search with Speech",
	page_icon="🎥",
	layout="wide"
	)

	# Initialize session state
	if 'search_history' not in st.session_state:
	st.session_state['search_history'] = []
	if 'last_voice_input' not in st.session_state:
	st.session_state['last_voice_input'] = ""

	# Initialize the speech component
	speech_component = components.declare_component("speech_recognition", path="mycomponent")

	class VideoSearch:
	def __init__(self):
	self.text_model = SentenceTransformer('all-MiniLM-L6-v2')
	self.load_dataset()

	def fetch_dataset_rows(self):
	"""Fetch dataset from Hugging Face API"""
	import requests

	# Fetch first rows from the dataset
	url = "https://datasets-server.huggingface.co/first-rows?dataset=omegalabsinc%2Fomega-multimodal&config=default&split=train"
	response = requests.get(url)

	if response.status_code == 200:
	data = response.json()
	# Extract the rows from the response
	rows = data.get('rows', [])
	return pd.DataFrame(rows)
	else:
	st.error(f"Error fetching dataset: {response.status_code}")
	return None

	def get_dataset_splits(self):
	"""Get available dataset splits"""
	import requests

	url = "https://datasets-server.huggingface.co/splits?dataset=omegalabsinc%2Fomega-multimodal"
	response = requests.get(url)

	if response.status_code == 200:
	splits_data = response.json()
	return splits_data
	else:
	st.error(f"Error fetching splits: {response.status_code}")
	return None

	def load_dataset(self):
	"""Load the Omega Multimodal dataset"""
	try:
	# Fetch dataset from Hugging Face API
	self.dataset = self.fetch_dataset_rows()

	if self.dataset is not None:
	# Get dataset splits info
	splits_info = self.get_dataset_splits()
	if splits_info:
	st.sidebar.write("Available splits:", splits_info)

	self.prepare_features()
	else:
	self.create_dummy_data()

	except Exception as e:
	st.error(f"Error loading dataset: {e}")
	self.create_dummy_data()

	def prepare_features(self):
	"""Prepare and cache embeddings"""
	# Convert string representations of embeddings back to numpy arrays
	try:
	self.video_embeds = np.array([json.loads(e) if isinstance(e, str) else e
	for e in self.dataset.video_embed])
	self.text_embeds = np.array([json.loads(e) if isinstance(e, str) else e
	for e in self.dataset.description_embed])
	except Exception as e:
	st.error(f"Error preparing features: {e}")
	# Create random embeddings as fallback
	num_rows = len(self.dataset)
	self.video_embeds = np.random.randn(num_rows, 384)
	self.text_embeds = np.random.randn(num_rows, 384)

	def create_dummy_data(self):
	"""Create dummy data for testing"""
	self.dataset = pd.DataFrame({
	'video_id': [f'video_{i}' for i in range(10)],
	'youtube_id': ['dQw4w9WgXcQ'] * 10,
	'description': ['Sample video description'] * 10,
	'views': [1000] * 10,
	'start_time': [0] * 10,
	'end_time': [60] * 10
	})
	# Create dummy embeddings
	self.video_embeds = np.random.randn(10, 384) # Match model dimensions
	self.text_embeds = np.random.randn(10, 384)


	def search(self, query, top_k=5):
	"""Search videos using query"""
	query_embedding = self.text_model.encode([query])[0]

	# Compute similarities
	video_sims = cosine_similarity([query_embedding], self.video_embeds)[0]
	text_sims = cosine_similarity([query_embedding], self.text_embeds)[0]

	# Combine similarities
	combined_sims = 0.5 * video_sims + 0.5 * text_sims

	# Get top results
	top_indices = np.argsort(combined_sims)[-top_k:][::-1]

	results = []
	for idx in top_indices:
	results.append({
	'video_id': self.dataset.iloc[idx]['video_id'],
	'youtube_id': self.dataset.iloc[idx]['youtube_id'],
	'description': self.dataset.iloc[idx]['description'],
	'start_time': self.dataset.iloc[idx]['start_time'],
	'end_time': self.dataset.iloc[idx]['end_time'],
	'relevance_score': float(combined_sims[idx]),
	'views': self.dataset.iloc[idx]['views']
	})

	return results

	async def generate_speech(text, voice="en-US-AriaNeural"):
	"""Generate speech using Edge TTS"""
	if not text.strip():
	return None

	communicate = edge_tts.Communicate(text, voice)
	audio_file = f"speech_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3"
	await communicate.save(audio_file)
	return audio_file

	def main():
	st.title("🎥 Video Search with Speech Recognition")

	# Initialize video search
	search = VideoSearch()

	# Create tabs
	tab1, tab2, tab3 = st.tabs(["🔍 Search", "🎙️ Voice Input", "💾 History"])

	with tab1:
	st.subheader("Search Videos")

	# Text search
	query = st.text_input("Enter your search query:")
	col1, col2 = st.columns(2)

	with col1:
	search_button = st.button("🔍 Search")
	with col2:
	num_results = st.slider("Number of results:", 1, 10, 5)

	if search_button and query:
	results = search.search(query, num_results)
	st.session_state['search_history'].append({
	'query': query,
	'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
	'results': results
	})

	for i, result in enumerate(results, 1):
	with st.expander(f"Result {i}: {result['description'][:100]}...", expanded=i==1):
	cols = st.columns([2, 1])

	with cols[0]:
	st.markdown(f"Full Description:")
	st.write(result['description'])
	st.markdown(f"Time Range: {result['start_time']}s - {result['end_time']}s")
	st.markdown(f"Views: {result['views']:,}")

	with cols[1]:
	st.markdown(f"Relevance Score: {result['relevance_score']:.2%}")
	if result['youtube_id']:
	st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result['start_time']}")

	# Generate audio summary
	if st.button(f"🔊 Generate Audio Summary", key=f"audio_{i}"):
	summary = f"Video summary: {result['description'][:200]}"
	audio_file = asyncio.run(generate_speech(summary))
	if audio_file:
	st.audio(audio_file)
	# Cleanup audio file
	if os.path.exists(audio_file):
	os.remove(audio_file)

	with tab2:
	st.subheader("Voice Input")

	# Speech recognition component
	voice_input = speech_component()

	if voice_input and voice_input != st.session_state['last_voice_input']:
	st.session_state['last_voice_input'] = voice_input
	st.markdown("Transcribed Text:")
	st.write(voice_input)

	if st.button("🔍 Search Videos"):
	results = search.search(voice_input, num_results)
	st.session_state['search_history'].append({
	'query': voice_input,
	'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
	'results': results
	})
	for i, result in enumerate(results, 1):
	with st.expander(f"Result {i}: {result['description'][:100]}...", expanded=i==1):
	st.write(result['description'])
	if result['youtube_id']:
	st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result['start_time']}")

	with tab3:
	st.subheader("Search History")

	if st.button("🗑️ Clear History"):
	st.session_state['search_history'] = []
	st.experimental_rerun()

	for i, entry in enumerate(reversed(st.session_state['search_history'])):
	with st.expander(f"Query: {entry['query']} ({entry['timestamp']})", expanded=False):
	st.markdown(f"Original Query: {entry['query']}")
	st.markdown(f"Time: {entry['timestamp']}")

	for j, result in enumerate(entry['results'], 1):
	st.markdown(f"Result {j}:")
	st.write(result['description'])
	if result['youtube_id']:
	st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result['start_time']}")

	# Sidebar configuration
	with st.sidebar:
	st.subheader("⚙️ Configuration")
	st.markdown("Video Search Settings")
	st.slider("Default Results:", 1, 10, 5, key="default_results")

	st.markdown("Voice Settings")
	st.selectbox("TTS Voice:",
	["en-US-AriaNeural", "en-US-GuyNeural", "en-GB-SoniaNeural"],
	key="tts_voice")

	st.markdown("Model Settings")
	st.selectbox("Text Embedding Model:",
	["all-MiniLM-L6-v2", "paraphrase-multilingual-MiniLM-L12-v2"],
	key="embedding_model")

	if st.button("📥 Download Search History"):
	# Convert history to JSON
	history_json = json.dumps(st.session_state['search_history'], indent=2)
	b64 = base64.b64encode(history_json.encode()).decode()
	href = f'<a href="data:file/json;base64,{b64}" download="search_history.json">Download JSON</a>'
	st.markdown(href, unsafe_allow_html=True)

	if __name__ == "__main__":
	main()