nikhildsst commited on
Commit
15f82a0
·
verified ·
1 Parent(s): 5baeddd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -0
app.py CHANGED
@@ -17,6 +17,48 @@ import numpy as np
17
  import shutil
18
 
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  def scrape_courses_with_selenium(url, limit=50):
21
  options = Options()
22
  options.headless = True # Headless browsing
 
17
  import shutil
18
 
19
 
20
+ # Install Google Chrome and ChromeDriver if not already installed
21
+ os.system('wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb')
22
+ os.system('apt-get update && apt-get install -y ./google-chrome-stable_current_amd64.deb')
23
+ os.system('CHROME_VERSION=$(google-chrome --version | awk \'{print $3}\' | cut -d \'.\' -f 1)')
24
+ os.system('wget https://chromedriver.storage.googleapis.com/${CHROME_VERSION}.0/chromedriver_linux64.zip')
25
+ os.system('unzip chromedriver_linux64.zip')
26
+ os.system('mv chromedriver /usr/bin/chromedriver && chmod +x /usr/bin/chromedriver')
27
+
28
+ def check_chrome_installation():
29
+ """Check if Chrome is installed and get its version"""
30
+ try:
31
+ # First try the default 'google-chrome' command
32
+ result = subprocess.run(['google-chrome', '--version'],
33
+ capture_output=True,
34
+ text=True)
35
+ if result.returncode == 0:
36
+ return True
37
+ except FileNotFoundError:
38
+ # If 'google-chrome' fails, try 'google-chrome-stable'
39
+ try:
40
+ result = subprocess.run(['google-chrome-stable', '--version'],
41
+ capture_output=True,
42
+ text=True)
43
+ if result.returncode == 0:
44
+ return True
45
+ except FileNotFoundError:
46
+ pass
47
+ return False
48
+
49
+ def setup_chrome_options():
50
+ """Setup Chrome options with all necessary arguments"""
51
+ chrome_options = Options()
52
+ chrome_options.add_argument('--headless=new') # Updated headless argument
53
+ chrome_options.add_argument('--no-sandbox')
54
+ chrome_options.add_argument('--disable-dev-shm-usage')
55
+ chrome_options.add_argument('--disable-gpu')
56
+ chrome_options.add_argument('--disable-software-rasterizer')
57
+ chrome_options.add_argument('--disable-extensions')
58
+ chrome_options.add_argument('--disable-setuid-sandbox')
59
+ return chrome_options
60
+
61
+
62
  def scrape_courses_with_selenium(url, limit=50):
63
  options = Options()
64
  options.headless = True # Headless browsing