nniehaus commited on
Commit
a8c7ff3
·
verified ·
1 Parent(s): 47d0657

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -63
app.py CHANGED
@@ -2,13 +2,11 @@ import streamlit as st
2
  import requests
3
  from bs4 import BeautifulSoup
4
  from geopy.geocoders import Nominatim
5
- from urllib.parse import urljoin, urlparse
6
- import re
7
  import os
8
- import pandas as pd
9
  import folium
10
  from streamlit_folium import folium_static
11
  from jsonschema import validate, ValidationError
 
12
 
13
  # Configure environment
14
  DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_KEY")
@@ -39,39 +37,66 @@ RESPONSE_SCHEMA = {
39
  }
40
  }
41
 
 
 
 
 
 
 
 
 
 
 
 
42
  @st.cache_data
43
  def scrape_location_data(query):
44
- """Scrape location data with enhanced error handling"""
45
- sources = {
46
- "Niche": f"https://www.niche.com/places-to-live/search/{query}",
47
- }
48
-
49
  results = []
50
- for source, url in sources.items():
51
- try:
52
- response = requests.get(url, timeout=15)
53
- response.raise_for_status()
54
- soup = BeautifulSoup(response.text, 'html.parser')
55
-
56
- if source == "Niche":
57
- listings = soup.find_all('div', class_='search-results__list__item')
58
- for item in listings[:3]:
59
- results.append({
60
- 'name': item.find('h2').text.strip(),
61
- 'details': item.find('div', class_='search-result-tagline').text.strip(),
62
- 'score': item.find('div', class_='search-result-grade').text.strip()
63
- })
64
-
65
- except Exception as e:
66
- st.error(f"Scraping error for {source}: {str(e)}")
67
- continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
- return results
 
 
 
 
70
 
71
  def generate_recommendations(preferences):
72
  """Generate recommendations with robust error handling"""
73
  if not DEEPSEEK_API_KEY:
74
- st.error("Missing API key - check environment configuration")
75
  return None
76
 
77
  headers = {
@@ -80,17 +105,14 @@ def generate_recommendations(preferences):
80
  }
81
 
82
  prompt = f"""
83
- Create a neighborhood recommendation report based on these preferences:
84
- {preferences}
85
-
86
- Include these sections:
87
- 1. Top 5 Neighborhood Matches
88
- 2. Hidden Gem Recommendation
89
- 3. Key Amenities Analysis
90
- 4. Commute Times Overview
91
- 5. Safety & Community Insights
92
-
93
- Format with markdown headers and bullet points.
94
  """
95
 
96
  try:
@@ -106,20 +128,18 @@ def generate_recommendations(preferences):
106
  timeout=30
107
  )
108
 
109
- # Validate response status
110
  if response.status_code != 200:
111
  error_msg = response.json().get('error', {}).get('message', 'Unknown API error')
112
  st.error(f"API Error {response.status_code}: {error_msg}")
113
  return None
114
 
115
- # Validate response structure
116
  response_data = response.json()
117
  validate(instance=response_data, schema=RESPONSE_SCHEMA)
118
 
119
  return response_data["choices"][0]["message"]["content"]
120
 
121
  except ValidationError as ve:
122
- st.error(f"Invalid API response format: {str(ve)}")
123
  return None
124
  except Exception as e:
125
  st.error(f"Connection Error: {str(e)}")
@@ -127,46 +147,48 @@ def generate_recommendations(preferences):
127
 
128
  # Streamlit UI
129
  st.set_page_config(layout="wide", page_icon="🏡")
130
- st.title("Neighborhood Matchmaker")
131
 
132
  with st.sidebar:
133
  st.header("Search Preferences")
134
  city = st.text_input("City/Region", "New York, NY")
135
- budget = st.slider("Monthly Housing Budget ($)", 1000, 10000, 3000)
136
- commute = st.selectbox("Max Commute Time", ["15 mins", "30 mins", "45 mins", "1 hour"])
137
- amenities = st.multiselect("Must-Have Amenities", [
138
- "Good Schools", "Parks", "Public Transport",
139
- "Nightlife", "Shopping", "Healthcare"
140
- ])
141
- lifestyle = st.selectbox("Lifestyle Preference", [
142
- "Family-Friendly", "Urban Professional", "Retirement",
143
- "Student", "Remote Worker", "Outdoor Enthusiast"
144
- ])
145
 
146
- if st.button("Find My Neighborhood"):
147
  with st.spinner("Analyzing locations..."):
148
  preferences = {
149
  "city": city,
150
- "budget": f"${budget}/mo",
151
- "max_commute": commute,
152
- "amenities": amenities,
153
  "lifestyle": lifestyle
154
  }
155
 
156
  location_data = scrape_location_data(city)
 
 
 
 
 
157
  report = generate_recommendations(preferences)
158
 
159
  if report:
160
- st.subheader("Your Personalized Neighborhood Report")
161
  st.markdown(report)
162
 
163
  try:
164
- geolocator = Nominatim(user_agent="neighborhood_finder")
165
  location = geolocator.geocode(city)
166
- m = folium.Map(location=[location.latitude, location.longitude], zoom_start=12)
167
- folium_static(m, width=1200, height=500)
 
 
 
 
 
168
  except Exception as e:
169
- st.warning(f"Map visualization error: {str(e)}")
170
 
171
  st.markdown("---")
172
- st.caption("Note: Recommendations generated by AI. Verify with local experts.")
 
2
  import requests
3
  from bs4 import BeautifulSoup
4
  from geopy.geocoders import Nominatim
 
 
5
  import os
 
6
  import folium
7
  from streamlit_folium import folium_static
8
  from jsonschema import validate, ValidationError
9
+ from fake_useragent import UserAgent
10
 
11
  # Configure environment
12
  DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_KEY")
 
37
  }
38
  }
39
 
40
+ def get_scraping_headers():
41
+ """Generate random browser headers"""
42
+ ua = UserAgent()
43
+ return {
44
+ 'User-Agent': ua.chrome,
45
+ 'Accept-Language': 'en-US,en;q=0.9',
46
+ 'Referer': 'https://www.google.com/',
47
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
48
+ 'DNT': '1'
49
+ }
50
+
51
  @st.cache_data
52
  def scrape_location_data(query):
53
+ """Scrape location data with advanced anti-bot measures"""
54
+ session = requests.Session()
 
 
 
55
  results = []
56
+
57
+ try:
58
+ # First request to get through potential bot checks
59
+ search_url = f"https://www.niche.com/places-to-live/search/{query}"
60
+ response = session.get(
61
+ search_url,
62
+ headers=get_scraping_headers(),
63
+ timeout=15,
64
+ allow_redirects=True
65
+ )
66
+
67
+ # Detect bot protection
68
+ if any(keyword in response.text.lower() for keyword in ['captcha', 'security check']):
69
+ st.warning("""
70
+ **Manual Verification Required:**
71
+ 1. Visit [Niche.com](https://www.niche.com) directly
72
+ 2. Complete any security checks
73
+ 3. Return here and try again
74
+ """)
75
+ return []
76
+
77
+ soup = BeautifulSoup(response.text, 'html.parser')
78
+
79
+ # Resilient CSS selector patterns
80
+ listings = soup.select('div[class*="search-results__list__item"]')
81
+
82
+ for item in listings[:3]:
83
+ result = {
84
+ 'name': item.select_one('h2').get_text(strip=True) if item.select_one('h2') else 'N/A',
85
+ 'details': item.select_one('div[class*="tagline"]').get_text(strip=True) if item.select_one('div[class*="tagline"]') else '',
86
+ 'score': item.select_one('div[class*="grade"]').get_text(strip=True) if item.select_one('div[class*="grade"]")') else ''
87
+ }
88
+ results.append(result)
89
 
90
+ return results
91
+
92
+ except Exception as e:
93
+ st.error(f"Data retrieval issue: {str(e)}")
94
+ return []
95
 
96
  def generate_recommendations(preferences):
97
  """Generate recommendations with robust error handling"""
98
  if not DEEPSEEK_API_KEY:
99
+ st.error("Missing API key configuration")
100
  return None
101
 
102
  headers = {
 
105
  }
106
 
107
  prompt = f"""
108
+ Analyze these neighborhood preferences: {preferences}
109
+ Create a detailed report with:
110
+ - Top 3 matches
111
+ - 1 hidden gem
112
+ - Amenity analysis
113
+ - Safety insights
114
+ - Price comparisons
115
+ Format with markdown sections
 
 
 
116
  """
117
 
118
  try:
 
128
  timeout=30
129
  )
130
 
 
131
  if response.status_code != 200:
132
  error_msg = response.json().get('error', {}).get('message', 'Unknown API error')
133
  st.error(f"API Error {response.status_code}: {error_msg}")
134
  return None
135
 
 
136
  response_data = response.json()
137
  validate(instance=response_data, schema=RESPONSE_SCHEMA)
138
 
139
  return response_data["choices"][0]["message"]["content"]
140
 
141
  except ValidationError as ve:
142
+ st.error(f"Invalid API response: {str(ve)}")
143
  return None
144
  except Exception as e:
145
  st.error(f"Connection Error: {str(e)}")
 
147
 
148
  # Streamlit UI
149
  st.set_page_config(layout="wide", page_icon="🏡")
150
+ st.title("Neighborhood Matchmaker Pro")
151
 
152
  with st.sidebar:
153
  st.header("Search Preferences")
154
  city = st.text_input("City/Region", "New York, NY")
155
+ budget = st.slider("Monthly Budget ($)", 1000, 10000, 3000)
156
+ commute = st.selectbox("Max Commute", ["15 mins", "30 mins", "45 mins"])
157
+ lifestyle = st.selectbox("Lifestyle", ["Family", "Urban", "Remote"])
 
 
 
 
 
 
 
158
 
159
+ if st.button("Find Neighborhoods"):
160
  with st.spinner("Analyzing locations..."):
161
  preferences = {
162
  "city": city,
163
+ "budget": budget,
164
+ "commute": commute,
 
165
  "lifestyle": lifestyle
166
  }
167
 
168
  location_data = scrape_location_data(city)
169
+
170
+ if not location_data:
171
+ st.info("Using alternative data sources...")
172
+ # Implement fallback scraping here
173
+
174
  report = generate_recommendations(preferences)
175
 
176
  if report:
177
+ st.subheader("Custom Neighborhood Report")
178
  st.markdown(report)
179
 
180
  try:
181
+ geolocator = Nominatim(user_agent="geo_app_v2")
182
  location = geolocator.geocode(city)
183
+ if location:
184
+ m = folium.Map(
185
+ location=[location.latitude, location.longitude],
186
+ zoom_start=12,
187
+ tiles="CartoDB positron"
188
+ )
189
+ folium_static(m, width=1000, height=500)
190
  except Exception as e:
191
+ st.warning(f"Map error: {str(e)}")
192
 
193
  st.markdown("---")
194
+ st.caption("Note: Results may vary based on data availability")