awacke1 commited on
Commit
90807c0
Β·
verified Β·
1 Parent(s): 0108f8c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +276 -0
app.py ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sentence_transformers import SentenceTransformer
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+ import torch
7
+ import json
8
+ import os
9
+ from pathlib import Path
10
+ from datetime import datetime
11
+ import edge_tts
12
+ import asyncio
13
+ import base64
14
+ from openai import OpenAI
15
+ import anthropic
16
+ import streamlit.components.v1 as components
17
+
18
+ # Page configuration
19
+ st.set_page_config(
20
+ page_title="Video Search with Speech",
21
+ page_icon="πŸŽ₯",
22
+ layout="wide"
23
+ )
24
+
25
+ # Initialize session state
26
+ if 'messages' not in st.session_state:
27
+ st.session_state['messages'] = []
28
+ if 'search_history' not in st.session_state:
29
+ st.session_state['search_history'] = []
30
+ if 'last_voice_input' not in st.session_state:
31
+ st.session_state['last_voice_input'] = ""
32
+
33
+ # Load environment variables
34
+ openai_client = OpenAI()
35
+ claude_client = anthropic.Anthropic()
36
+
37
+ # Initialize the speech component
38
+ speech_component = components.declare_component("speech_recognition", path="mycomponent")
39
+
40
+ class VideoSearch:
41
+ def __init__(self):
42
+ self.text_model = SentenceTransformer('all-MiniLM-L6-v2')
43
+ self.load_dataset()
44
+
45
+ def load_dataset(self):
46
+ """Load the Omega Multimodal dataset"""
47
+ try:
48
+ # Load dataset from Hugging Face
49
+ self.dataset = pd.read_csv("paste.txt")
50
+ self.prepare_features()
51
+ except Exception as e:
52
+ st.error(f"Error loading dataset: {e}")
53
+ self.create_dummy_data()
54
+
55
+ def prepare_features(self):
56
+ """Prepare and cache embeddings"""
57
+ # Convert string representations of embeddings back to numpy arrays
58
+ self.video_embeds = np.array([json.loads(e) if isinstance(e, str) else e
59
+ for e in self.dataset.video_embed])
60
+ self.text_embeds = np.array([json.loads(e) if isinstance(e, str) else e
61
+ for e in self.dataset.description_embed])
62
+
63
+ def create_dummy_data(self):
64
+ """Create dummy data for testing"""
65
+ self.dataset = pd.DataFrame({
66
+ 'video_id': [f'video_{i}' for i in range(10)],
67
+ 'youtube_id': ['dQw4w9WgXcQ'] * 10, # Example YouTube ID
68
+ 'description': ['Sample video description'] * 10,
69
+ 'views': [1000] * 10,
70
+ 'start_time': [0] * 10,
71
+ 'end_time': [60] * 10
72
+ })
73
+ # Create dummy embeddings
74
+ self.video_embeds = np.random.randn(10, 384) # Match model dimensions
75
+ self.text_embeds = np.random.randn(10, 384)
76
+
77
+ def search(self, query, top_k=5):
78
+ """Search videos using query"""
79
+ query_embedding = self.text_model.encode([query])[0]
80
+
81
+ # Compute similarities
82
+ video_sims = cosine_similarity([query_embedding], self.video_embeds)[0]
83
+ text_sims = cosine_similarity([query_embedding], self.text_embeds)[0]
84
+
85
+ # Combine similarities
86
+ combined_sims = 0.5 * video_sims + 0.5 * text_sims
87
+
88
+ # Get top results
89
+ top_indices = np.argsort(combined_sims)[-top_k:][::-1]
90
+
91
+ results = []
92
+ for idx in top_indices:
93
+ results.append({
94
+ 'video_id': self.dataset.iloc[idx]['video_id'],
95
+ 'youtube_id': self.dataset.iloc[idx]['youtube_id'],
96
+ 'description': self.dataset.iloc[idx]['description'],
97
+ 'start_time': self.dataset.iloc[idx]['start_time'],
98
+ 'end_time': self.dataset.iloc[idx]['end_time'],
99
+ 'relevance_score': float(combined_sims[idx]),
100
+ 'views': self.dataset.iloc[idx]['views']
101
+ })
102
+
103
+ return results
104
+
105
+ async def generate_speech(text, voice="en-US-AriaNeural"):
106
+ """Generate speech using Edge TTS"""
107
+ if not text.strip():
108
+ return None
109
+
110
+ communicate = edge_tts.Communicate(text, voice)
111
+ audio_file = f"speech_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3"
112
+ await communicate.save(audio_file)
113
+ return audio_file
114
+
115
+ def process_with_gpt4(prompt):
116
+ """Process text with GPT-4"""
117
+ try:
118
+ response = openai_client.chat.completions.create(
119
+ model="gpt-4",
120
+ messages=[{"role": "user", "content": prompt}]
121
+ )
122
+ return response.choices[0].message.content
123
+ except Exception as e:
124
+ st.error(f"Error with GPT-4: {e}")
125
+ return None
126
+
127
+ def process_with_claude(prompt):
128
+ """Process text with Claude"""
129
+ try:
130
+ response = claude_client.messages.create(
131
+ model="claude-3-sonnet-20240229",
132
+ max_tokens=1000,
133
+ messages=[{"role": "user", "content": prompt}]
134
+ )
135
+ return response.content[0].text
136
+ except Exception as e:
137
+ st.error(f"Error with Claude: {e}")
138
+ return None
139
+
140
+ def main():
141
+ st.title("πŸŽ₯ Video Search with Speech Recognition")
142
+
143
+ # Initialize video search
144
+ search = VideoSearch()
145
+
146
+ # Create tabs
147
+ tab1, tab2, tab3 = st.tabs(["πŸ” Search", "πŸŽ™οΈ Voice Input", "πŸ’Ύ History"])
148
+
149
+ with tab1:
150
+ st.subheader("Search Videos")
151
+
152
+ # Text search
153
+ query = st.text_input("Enter your search query:")
154
+ col1, col2 = st.columns(2)
155
+
156
+ with col1:
157
+ search_button = st.button("πŸ” Search")
158
+ with col2:
159
+ num_results = st.slider("Number of results:", 1, 10, 5)
160
+
161
+ if search_button and query:
162
+ results = search.search(query, num_results)
163
+ st.session_state['search_history'].append({
164
+ 'query': query,
165
+ 'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
166
+ 'results': results
167
+ })
168
+
169
+ for i, result in enumerate(results, 1):
170
+ with st.expander(f"Result {i}: {result['description'][:100]}...", expanded=i==1):
171
+ cols = st.columns([2, 1])
172
+
173
+ with cols[0]:
174
+ st.markdown(f"**Full Description:**")
175
+ st.write(result['description'])
176
+ st.markdown(f"**Time Range:** {result['start_time']}s - {result['end_time']}s")
177
+ st.markdown(f"**Views:** {result['views']:,}")
178
+
179
+ with cols[1]:
180
+ st.markdown(f"**Relevance Score:** {result['relevance_score']:.2%}")
181
+ if result['youtube_id']:
182
+ st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result['start_time']}")
183
+
184
+ # Generate audio summary
185
+ if st.button(f"πŸ”Š Generate Audio Summary", key=f"audio_{i}"):
186
+ summary = f"Video summary: {result['description'][:200]}"
187
+ audio_file = asyncio.run(generate_speech(summary))
188
+ if audio_file:
189
+ st.audio(audio_file)
190
+ # Cleanup audio file
191
+ if os.path.exists(audio_file):
192
+ os.remove(audio_file)
193
+
194
+ with tab2:
195
+ st.subheader("Voice Input")
196
+
197
+ # Speech recognition component
198
+ voice_input = speech_component()
199
+
200
+ if voice_input and voice_input != st.session_state['last_voice_input']:
201
+ st.session_state['last_voice_input'] = voice_input
202
+ st.markdown("**Transcribed Text:**")
203
+ st.write(voice_input)
204
+
205
+ cols = st.columns(3)
206
+ with cols[0]:
207
+ if st.button("πŸ” Search Videos"):
208
+ results = search.search(voice_input, num_results)
209
+ st.session_state['search_history'].append({
210
+ 'query': voice_input,
211
+ 'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
212
+ 'results': results
213
+ })
214
+ for i, result in enumerate(results, 1):
215
+ with st.expander(f"Result {i}: {result['description'][:100]}...", expanded=i==1):
216
+ st.write(result['description'])
217
+ if result['youtube_id']:
218
+ st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result['start_time']}")
219
+
220
+ with cols[1]:
221
+ if st.button("πŸ€– Process with GPT-4"):
222
+ gpt_response = process_with_gpt4(voice_input)
223
+ if gpt_response:
224
+ st.markdown("**GPT-4 Response:**")
225
+ st.write(gpt_response)
226
+
227
+ with cols[2]:
228
+ if st.button("🧠 Process with Claude"):
229
+ claude_response = process_with_claude(voice_input)
230
+ if claude_response:
231
+ st.markdown("**Claude Response:**")
232
+ st.write(claude_response)
233
+
234
+ with tab3:
235
+ st.subheader("Search History")
236
+
237
+ if st.button("πŸ—‘οΈ Clear History"):
238
+ st.session_state['search_history'] = []
239
+ st.experimental_rerun()
240
+
241
+ for i, entry in enumerate(reversed(st.session_state['search_history'])):
242
+ with st.expander(f"Query: {entry['query']} ({entry['timestamp']})", expanded=False):
243
+ st.markdown(f"**Original Query:** {entry['query']}")
244
+ st.markdown(f"**Time:** {entry['timestamp']}")
245
+
246
+ for j, result in enumerate(entry['results'], 1):
247
+ st.markdown(f"**Result {j}:**")
248
+ st.write(result['description'])
249
+ if result['youtube_id']:
250
+ st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result['start_time']}")
251
+
252
+ # Sidebar configuration
253
+ with st.sidebar:
254
+ st.subheader("βš™οΈ Configuration")
255
+ st.markdown("**Video Search Settings**")
256
+ st.slider("Default Results:", 1, 10, 5, key="default_results")
257
+
258
+ st.markdown("**Voice Settings**")
259
+ st.selectbox("TTS Voice:",
260
+ ["en-US-AriaNeural", "en-US-GuyNeural", "en-GB-SoniaNeural"],
261
+ key="tts_voice")
262
+
263
+ st.markdown("**Model Settings**")
264
+ st.selectbox("Text Embedding Model:",
265
+ ["all-MiniLM-L6-v2", "paraphrase-multilingual-MiniLM-L12-v2"],
266
+ key="embedding_model")
267
+
268
+ if st.button("πŸ“₯ Download Search History"):
269
+ # Convert history to JSON
270
+ history_json = json.dumps(st.session_state['search_history'], indent=2)
271
+ b64 = base64.b64encode(history_json.encode()).decode()
272
+ href = f'<a href="data:file/json;base64,{b64}" download="search_history.json">Download JSON</a>'
273
+ st.markdown(href, unsafe_allow_html=True)
274
+
275
+ if __name__ == "__main__":
276
+ main()