xkwsoftlist/test_single_url.py

import requests
from bs4 import BeautifulSoup
import re

# 使用 Session 模拟真实浏览器
HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
    'Referer': 'https://www.zxxk.com/'
}

def check_url(url):
    print(f"Checking URL: {url}")
    session = requests.Session()
    try:
        response = session.get(url, headers=HEADERS, timeout=10)
        print(f"Status Code: {response.status_code}")

        if "check()" in response.text and "<script" in response.text:
            print("--- RESULT: Blocked by JavaScript Anti-Bot Challenge ---")
            return

        soup = BeautifulSoup(response.text, 'html.parser')

        # 寻找 other-info 节点
        other_info = soup.find('div', class_='other-info')
        if other_info:
            print("--- Found 'other-info' node ---")
            print(f"Content: {other_info.prettify()[:500]}...") # 打印前500字符

            time_node = other_info.find('div', class_='time')
            views_node = other_info.find('div', class_='views')
            download_node = other_info.find('div', class_='download')

            if time_node:
                print(f"Time node found: {time_node.get_text(strip=True)}")
            else:
                print("Time node NOT found")

            if views_node:
                print(f"Views node found: {views_node.get_text(strip=True)}")
            else:
                print("Views node NOT found")

            if download_node:
                print(f"Download node found: {download_node.get_text(strip=True)}")
            else:
                print("Download node NOT found")
        else:
            print("--- 'other-info' node NOT found ---")
            # 打印部分页面源代码以分析
            print("Page title:", soup.title.string if soup.title else "No title")
            print("First 200 chars of body:", str(soup.body)[:200] if soup.body else "No body")

    except Exception as e:
        print(f"Error: {e}")

if __name__ == "__main__":
    check_url("https://www.zxxk.com/soft/38837976.html")