awacke1 commited on
Commit
8630bc3
·
verified ·
1 Parent(s): 832c41b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -43
app.py CHANGED
@@ -58,72 +58,101 @@ class VideoSearch:
58
  def fetch_dataset_rows(self):
59
  """Fetch dataset from Hugging Face API with debug and caching"""
60
  try:
61
- # First try to load from local cache
62
- cache_file = "dataset_cache.json"
63
- if os.path.exists(cache_file):
64
- st.info("Loading from cache...")
65
- with open(cache_file, 'r', encoding='utf-8') as f:
66
- data = json.load(f)
67
- return pd.DataFrame(data)
68
-
69
  st.info("Fetching from Hugging Face API...")
70
  url = "https://datasets-server.huggingface.co/first-rows?dataset=omegalabsinc%2Fomega-multimodal&config=default&split=train"
71
 
72
- # Add debug output
73
- st.write(f"Requesting URL: {url}")
74
-
75
  response = requests.get(url, timeout=30)
76
  st.write(f"Response status: {response.status_code}")
77
 
78
  if response.status_code == 200:
79
  data = response.json()
80
 
81
- # Debug output
82
- st.write("Response structure:", list(data.keys()))
83
-
84
  if 'rows' in data:
85
- rows = data['rows']
86
-
87
- # Cache the response
88
- with open(cache_file, 'w', encoding='utf-8') as f:
89
- json.dump(rows, f)
90
 
91
- df = pd.DataFrame(rows)
92
 
93
  # Debug output
94
- st.write("DataFrame columns:", list(df.columns))
95
  st.write("Number of rows:", len(df))
96
 
97
  return df
98
  else:
99
  st.error("No 'rows' found in API response")
100
- st.write("API Response:", data)
101
-
102
- # Try loading example data
103
- example_file = "example_data.json"
104
- if os.path.exists(example_file):
105
- st.info("Loading example data...")
106
- with open(example_file, 'r', encoding='utf-8') as f:
107
- example_data = json.load(f)
108
- return pd.DataFrame(example_data)
109
-
110
- return None
111
  else:
112
  st.error(f"API request failed with status code: {response.status_code}")
113
- if response.status_code == 404:
114
- st.error("Dataset not found - check the dataset name and configuration")
115
- try:
116
- error_details = response.json()
117
- st.write("Error details:", error_details)
118
- except:
119
- st.write("Raw response:", response.text)
120
- return None
121
 
122
  except Exception as e:
123
  st.error(f"Error fetching dataset: {str(e)}")
124
- import traceback
125
- st.write("Traceback:", traceback.format_exc())
126
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
  def load_dataset(self):
129
  try:
 
58
  def fetch_dataset_rows(self):
59
  """Fetch dataset from Hugging Face API with debug and caching"""
60
  try:
 
 
 
 
 
 
 
 
61
  st.info("Fetching from Hugging Face API...")
62
  url = "https://datasets-server.huggingface.co/first-rows?dataset=omegalabsinc%2Fomega-multimodal&config=default&split=train"
63
 
 
 
 
64
  response = requests.get(url, timeout=30)
65
  st.write(f"Response status: {response.status_code}")
66
 
67
  if response.status_code == 200:
68
  data = response.json()
69
 
 
 
 
70
  if 'rows' in data:
71
+ # Extract actual row data from the nested structure
72
+ processed_rows = []
73
+ for row_data in data['rows']:
74
+ if 'row' in row_data: # Access the nested 'row' data
75
+ processed_rows.append(row_data['row'])
76
 
77
+ df = pd.DataFrame(processed_rows)
78
 
79
  # Debug output
80
+ st.write("DataFrame columns after processing:", list(df.columns))
81
  st.write("Number of rows:", len(df))
82
 
83
  return df
84
  else:
85
  st.error("No 'rows' found in API response")
86
+ st.write("Raw API Response:", data)
87
+ return self.load_example_data()
 
 
 
 
 
 
 
 
 
88
  else:
89
  st.error(f"API request failed with status code: {response.status_code}")
90
+ return self.load_example_data()
 
 
 
 
 
 
 
91
 
92
  except Exception as e:
93
  st.error(f"Error fetching dataset: {str(e)}")
94
+ return self.load_example_data()
95
+
96
+ def load_example_data(self):
97
+ """Load example data as fallback"""
98
+ example_data = [
99
+ {
100
+ "video_id": "cd21da96-fcca-4c94-a60f-0b1e4e1e29fc",
101
+ "youtube_id": "IO-vwtyicn4",
102
+ "description": "This video shows a close-up of an ancient text carved into a surface, with the text appearing to be in a cursive script.",
103
+ "views": 45489,
104
+ "start_time": 1452,
105
+ "end_time": 1458,
106
+ "video_embed": [0.014160037972033024, -0.003111184574663639, -0.016604168340563774],
107
+ "description_embed": [-0.05835828185081482, 0.02589797042310238, 0.11952091753482819]
108
+ },
109
+ {
110
+ "video_id": "a8ebde7d-d717-4c1e-8be4-bdb4bc0c544f",
111
+ "youtube_id": "mo4rEyF7gTE",
112
+ "description": "This video shows a close-up view of a classical architectural structure, featuring stone statues with ornate details.",
113
+ "views": 4468,
114
+ "start_time": 318,
115
+ "end_time": 324,
116
+ "video_embed": [0.015160037972033024, -0.004111184574663639, -0.017604168340563774],
117
+ "description_embed": [-0.06835828185081482, 0.03589797042310238, 0.12952091753482819]
118
+ },
119
+ {
120
+ "video_id": "d1be64a6-22e2-4fbd-a176-20749e7c3d8a",
121
+ "youtube_id": "IO-vwtyicn4",
122
+ "description": "This video shows a weathered ancient painting depicting figures in classical style with vibrant colors preserved.",
123
+ "views": 45489,
124
+ "start_time": 1698,
125
+ "end_time": 1704,
126
+ "video_embed": [0.016160037972033024, -0.005111184574663639, -0.018604168340563774],
127
+ "description_embed": [-0.07835828185081482, 0.04589797042310238, 0.13952091753482819]
128
+ }
129
+ ]
130
+ return pd.DataFrame(example_data)
131
+
132
+ def prepare_features(self):
133
+ """Prepare and cache embeddings"""
134
+ try:
135
+ if 'video_embed' not in self.dataset.columns:
136
+ st.warning("Using example data embeddings")
137
+ self.dataset = self.load_example_data()
138
+
139
+ # Convert string representations of embeddings back to numpy arrays
140
+ try:
141
+ self.video_embeds = np.array([json.loads(e) if isinstance(e, str) else e
142
+ for e in self.dataset.video_embed])
143
+ self.text_embeds = np.array([json.loads(e) if isinstance(e, str) else e
144
+ for e in self.dataset.description_embed])
145
+ except Exception as e:
146
+ st.error(f"Error converting embeddings: {e}")
147
+ num_rows = len(self.dataset)
148
+ self.video_embeds = np.random.randn(num_rows, 384)
149
+ self.text_embeds = np.random.randn(num_rows, 384)
150
+ except Exception as e:
151
+ st.error(f"Error preparing features: {e}")
152
+ # Create random embeddings as fallback
153
+ num_rows = len(self.dataset)
154
+ self.video_embeds = np.random.randn(num_rows, 384)
155
+ self.text_embeds = np.random.randn(num_rows, 384)
156
 
157
  def load_dataset(self):
158
  try: