awacke1 commited on
Commit
cc67713
·
verified ·
1 Parent(s): 1ba134d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -69
app.py CHANGED
@@ -11,8 +11,6 @@ from datetime import datetime
11
  import edge_tts
12
  import asyncio
13
  import base64
14
- from openai import OpenAI
15
- import anthropic
16
  import streamlit.components.v1 as components
17
 
18
  # Page configuration
@@ -23,17 +21,11 @@ st.set_page_config(
23
  )
24
 
25
  # Initialize session state
26
- if 'messages' not in st.session_state:
27
- st.session_state['messages'] = []
28
  if 'search_history' not in st.session_state:
29
  st.session_state['search_history'] = []
30
  if 'last_voice_input' not in st.session_state:
31
  st.session_state['last_voice_input'] = ""
32
 
33
- # Load environment variables
34
- openai_client = OpenAI()
35
- claude_client = anthropic.Anthropic()
36
-
37
  # Initialize the speech component
38
  speech_component = components.declare_component("speech_recognition", path="mycomponent")
39
 
@@ -42,12 +34,53 @@ class VideoSearch:
42
  self.text_model = SentenceTransformer('all-MiniLM-L6-v2')
43
  self.load_dataset()
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  def load_dataset(self):
46
  """Load the Omega Multimodal dataset"""
47
  try:
48
- # Load dataset from Hugging Face
49
- self.dataset = pd.read_csv("paste.txt")
50
- self.prepare_features()
 
 
 
 
 
 
 
 
 
 
51
  except Exception as e:
52
  st.error(f"Error loading dataset: {e}")
53
  self.create_dummy_data()
@@ -55,16 +88,23 @@ class VideoSearch:
55
  def prepare_features(self):
56
  """Prepare and cache embeddings"""
57
  # Convert string representations of embeddings back to numpy arrays
58
- self.video_embeds = np.array([json.loads(e) if isinstance(e, str) else e
59
- for e in self.dataset.video_embed])
60
- self.text_embeds = np.array([json.loads(e) if isinstance(e, str) else e
61
- for e in self.dataset.description_embed])
 
 
 
 
 
 
 
62
 
63
  def create_dummy_data(self):
64
  """Create dummy data for testing"""
65
  self.dataset = pd.DataFrame({
66
  'video_id': [f'video_{i}' for i in range(10)],
67
- 'youtube_id': ['dQw4w9WgXcQ'] * 10, # Example YouTube ID
68
  'description': ['Sample video description'] * 10,
69
  'views': [1000] * 10,
70
  'start_time': [0] * 10,
@@ -74,6 +114,7 @@ class VideoSearch:
74
  self.video_embeds = np.random.randn(10, 384) # Match model dimensions
75
  self.text_embeds = np.random.randn(10, 384)
76
 
 
77
  def search(self, query, top_k=5):
78
  """Search videos using query"""
79
  query_embedding = self.text_model.encode([query])[0]
@@ -112,31 +153,6 @@ async def generate_speech(text, voice="en-US-AriaNeural"):
112
  await communicate.save(audio_file)
113
  return audio_file
114
 
115
- def process_with_gpt4(prompt):
116
- """Process text with GPT-4"""
117
- try:
118
- response = openai_client.chat.completions.create(
119
- model="gpt-4",
120
- messages=[{"role": "user", "content": prompt}]
121
- )
122
- return response.choices[0].message.content
123
- except Exception as e:
124
- st.error(f"Error with GPT-4: {e}")
125
- return None
126
-
127
- def process_with_claude(prompt):
128
- """Process text with Claude"""
129
- try:
130
- response = claude_client.messages.create(
131
- model="claude-3-sonnet-20240229",
132
- max_tokens=1000,
133
- messages=[{"role": "user", "content": prompt}]
134
- )
135
- return response.content[0].text
136
- except Exception as e:
137
- st.error(f"Error with Claude: {e}")
138
- return None
139
-
140
  def main():
141
  st.title("🎥 Video Search with Speech Recognition")
142
 
@@ -202,34 +218,18 @@ def main():
202
  st.markdown("**Transcribed Text:**")
203
  st.write(voice_input)
204
 
205
- cols = st.columns(3)
206
- with cols[0]:
207
- if st.button("🔍 Search Videos"):
208
- results = search.search(voice_input, num_results)
209
- st.session_state['search_history'].append({
210
- 'query': voice_input,
211
- 'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
212
- 'results': results
213
- })
214
- for i, result in enumerate(results, 1):
215
- with st.expander(f"Result {i}: {result['description'][:100]}...", expanded=i==1):
216
- st.write(result['description'])
217
- if result['youtube_id']:
218
- st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result['start_time']}")
219
-
220
- with cols[1]:
221
- if st.button("🤖 Process with GPT-4"):
222
- gpt_response = process_with_gpt4(voice_input)
223
- if gpt_response:
224
- st.markdown("**GPT-4 Response:**")
225
- st.write(gpt_response)
226
-
227
- with cols[2]:
228
- if st.button("🧠 Process with Claude"):
229
- claude_response = process_with_claude(voice_input)
230
- if claude_response:
231
- st.markdown("**Claude Response:**")
232
- st.write(claude_response)
233
 
234
  with tab3:
235
  st.subheader("Search History")
 
11
  import edge_tts
12
  import asyncio
13
  import base64
 
 
14
  import streamlit.components.v1 as components
15
 
16
  # Page configuration
 
21
  )
22
 
23
  # Initialize session state
 
 
24
  if 'search_history' not in st.session_state:
25
  st.session_state['search_history'] = []
26
  if 'last_voice_input' not in st.session_state:
27
  st.session_state['last_voice_input'] = ""
28
 
 
 
 
 
29
  # Initialize the speech component
30
  speech_component = components.declare_component("speech_recognition", path="mycomponent")
31
 
 
34
  self.text_model = SentenceTransformer('all-MiniLM-L6-v2')
35
  self.load_dataset()
36
 
37
+ def fetch_dataset_rows(self):
38
+ """Fetch dataset from Hugging Face API"""
39
+ import requests
40
+
41
+ # Fetch first rows from the dataset
42
+ url = "https://datasets-server.huggingface.co/first-rows?dataset=omegalabsinc%2Fomega-multimodal&config=default&split=train"
43
+ response = requests.get(url)
44
+
45
+ if response.status_code == 200:
46
+ data = response.json()
47
+ # Extract the rows from the response
48
+ rows = data.get('rows', [])
49
+ return pd.DataFrame(rows)
50
+ else:
51
+ st.error(f"Error fetching dataset: {response.status_code}")
52
+ return None
53
+
54
+ def get_dataset_splits(self):
55
+ """Get available dataset splits"""
56
+ import requests
57
+
58
+ url = "https://datasets-server.huggingface.co/splits?dataset=omegalabsinc%2Fomega-multimodal"
59
+ response = requests.get(url)
60
+
61
+ if response.status_code == 200:
62
+ splits_data = response.json()
63
+ return splits_data
64
+ else:
65
+ st.error(f"Error fetching splits: {response.status_code}")
66
+ return None
67
+
68
  def load_dataset(self):
69
  """Load the Omega Multimodal dataset"""
70
  try:
71
+ # Fetch dataset from Hugging Face API
72
+ self.dataset = self.fetch_dataset_rows()
73
+
74
+ if self.dataset is not None:
75
+ # Get dataset splits info
76
+ splits_info = self.get_dataset_splits()
77
+ if splits_info:
78
+ st.sidebar.write("Available splits:", splits_info)
79
+
80
+ self.prepare_features()
81
+ else:
82
+ self.create_dummy_data()
83
+
84
  except Exception as e:
85
  st.error(f"Error loading dataset: {e}")
86
  self.create_dummy_data()
 
88
  def prepare_features(self):
89
  """Prepare and cache embeddings"""
90
  # Convert string representations of embeddings back to numpy arrays
91
+ try:
92
+ self.video_embeds = np.array([json.loads(e) if isinstance(e, str) else e
93
+ for e in self.dataset.video_embed])
94
+ self.text_embeds = np.array([json.loads(e) if isinstance(e, str) else e
95
+ for e in self.dataset.description_embed])
96
+ except Exception as e:
97
+ st.error(f"Error preparing features: {e}")
98
+ # Create random embeddings as fallback
99
+ num_rows = len(self.dataset)
100
+ self.video_embeds = np.random.randn(num_rows, 384)
101
+ self.text_embeds = np.random.randn(num_rows, 384)
102
 
103
  def create_dummy_data(self):
104
  """Create dummy data for testing"""
105
  self.dataset = pd.DataFrame({
106
  'video_id': [f'video_{i}' for i in range(10)],
107
+ 'youtube_id': ['dQw4w9WgXcQ'] * 10,
108
  'description': ['Sample video description'] * 10,
109
  'views': [1000] * 10,
110
  'start_time': [0] * 10,
 
114
  self.video_embeds = np.random.randn(10, 384) # Match model dimensions
115
  self.text_embeds = np.random.randn(10, 384)
116
 
117
+
118
  def search(self, query, top_k=5):
119
  """Search videos using query"""
120
  query_embedding = self.text_model.encode([query])[0]
 
153
  await communicate.save(audio_file)
154
  return audio_file
155
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  def main():
157
  st.title("🎥 Video Search with Speech Recognition")
158
 
 
218
  st.markdown("**Transcribed Text:**")
219
  st.write(voice_input)
220
 
221
+ if st.button("🔍 Search Videos"):
222
+ results = search.search(voice_input, num_results)
223
+ st.session_state['search_history'].append({
224
+ 'query': voice_input,
225
+ 'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
226
+ 'results': results
227
+ })
228
+ for i, result in enumerate(results, 1):
229
+ with st.expander(f"Result {i}: {result['description'][:100]}...", expanded=i==1):
230
+ st.write(result['description'])
231
+ if result['youtube_id']:
232
+ st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result['start_time']}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
 
234
  with tab3:
235
  st.subheader("Search History")