Update app.py
Browse files
app.py
CHANGED
@@ -17,6 +17,48 @@ import numpy as np
|
|
17 |
import shutil
|
18 |
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
def scrape_courses_with_selenium(url, limit=50):
|
21 |
options = Options()
|
22 |
options.headless = True # Headless browsing
|
|
|
17 |
import shutil
|
18 |
|
19 |
|
20 |
+
# Install Google Chrome and ChromeDriver if not already installed
|
21 |
+
os.system('wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb')
|
22 |
+
os.system('apt-get update && apt-get install -y ./google-chrome-stable_current_amd64.deb')
|
23 |
+
os.system('CHROME_VERSION=$(google-chrome --version | awk \'{print $3}\' | cut -d \'.\' -f 1)')
|
24 |
+
os.system('wget https://chromedriver.storage.googleapis.com/${CHROME_VERSION}.0/chromedriver_linux64.zip')
|
25 |
+
os.system('unzip chromedriver_linux64.zip')
|
26 |
+
os.system('mv chromedriver /usr/bin/chromedriver && chmod +x /usr/bin/chromedriver')
|
27 |
+
|
28 |
+
def check_chrome_installation():
|
29 |
+
"""Check if Chrome is installed and get its version"""
|
30 |
+
try:
|
31 |
+
# First try the default 'google-chrome' command
|
32 |
+
result = subprocess.run(['google-chrome', '--version'],
|
33 |
+
capture_output=True,
|
34 |
+
text=True)
|
35 |
+
if result.returncode == 0:
|
36 |
+
return True
|
37 |
+
except FileNotFoundError:
|
38 |
+
# If 'google-chrome' fails, try 'google-chrome-stable'
|
39 |
+
try:
|
40 |
+
result = subprocess.run(['google-chrome-stable', '--version'],
|
41 |
+
capture_output=True,
|
42 |
+
text=True)
|
43 |
+
if result.returncode == 0:
|
44 |
+
return True
|
45 |
+
except FileNotFoundError:
|
46 |
+
pass
|
47 |
+
return False
|
48 |
+
|
49 |
+
def setup_chrome_options():
|
50 |
+
"""Setup Chrome options with all necessary arguments"""
|
51 |
+
chrome_options = Options()
|
52 |
+
chrome_options.add_argument('--headless=new') # Updated headless argument
|
53 |
+
chrome_options.add_argument('--no-sandbox')
|
54 |
+
chrome_options.add_argument('--disable-dev-shm-usage')
|
55 |
+
chrome_options.add_argument('--disable-gpu')
|
56 |
+
chrome_options.add_argument('--disable-software-rasterizer')
|
57 |
+
chrome_options.add_argument('--disable-extensions')
|
58 |
+
chrome_options.add_argument('--disable-setuid-sandbox')
|
59 |
+
return chrome_options
|
60 |
+
|
61 |
+
|
62 |
def scrape_courses_with_selenium(url, limit=50):
|
63 |
options = Options()
|
64 |
options.headless = True # Headless browsing
|