diff --git a/scrape_to_sqlite.py b/scrape_to_sqlite.py index b293fd8..7bc2c75 100755 --- a/scrape_to_sqlite.py +++ b/scrape_to_sqlite.py @@ -5,6 +5,7 @@ import time import random import sqlite3 import datetime +import shutil import threading from selenium import webdriver from selenium.webdriver.chrome.service import Service @@ -35,7 +36,19 @@ def get_driver(): chrome_options.add_experimental_option("prefs", prefs) chrome_options.add_argument("user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36") - service = Service(ChromeDriverManager().install()) + # 优先使用本地 chromedriver:环境变量 CHROMEDRIVER_PATH 或 PATH 中的 chromedriver + chromedriver_path = os.environ.get('CHROMEDRIVER_PATH') or shutil.which('chromedriver') + if chromedriver_path: + try: + print(f"Using local chromedriver: {chromedriver_path}") + service = Service(chromedriver_path) + except Exception as e: + print(f"Local chromedriver at {chromedriver_path} failed to start: {e}\nFalling back to webdriver_manager...") + service = Service(ChromeDriverManager().install()) + else: + print("Local chromedriver not found; downloading via webdriver_manager...") + service = Service(ChromeDriverManager().install()) + driver = webdriver.Chrome(service=service, options=chrome_options) return driver @@ -80,7 +93,9 @@ def worker(thread_idx, start_id, end_id): driver = None try: + print(f"Thread-{thread_idx} initializing webdriver...") driver = get_driver() + print(f"Thread-{thread_idx} webdriver initialized") wait = WebDriverWait(driver, 95) # 稍微增加等待时间 while current_id <= end_id: @@ -150,7 +165,9 @@ def worker(thread_idx, start_id, end_id): #current_id += 1 except Exception as e: + import traceback print(f"Thread-{thread_idx} fatal error: {e}") + traceback.print_exc() finally: if driver: driver.quit()