Files
xkwsoftlist/test_single_url.py
2026-02-09 08:59:54 +08:00

61 lines
2.3 KiB
Python

import requests
from bs4 import BeautifulSoup
import re
# 使用 Session 模拟真实浏览器
HEADERS = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
'Referer': 'https://www.zxxk.com/'
}
def check_url(url):
print(f"Checking URL: {url}")
session = requests.Session()
try:
response = session.get(url, headers=HEADERS, timeout=10)
print(f"Status Code: {response.status_code}")
if "check()" in response.text and "<script" in response.text:
print("--- RESULT: Blocked by JavaScript Anti-Bot Challenge ---")
return
soup = BeautifulSoup(response.text, 'html.parser')
# 寻找 other-info 节点
other_info = soup.find('div', class_='other-info')
if other_info:
print("--- Found 'other-info' node ---")
print(f"Content: {other_info.prettify()[:500]}...") # 打印前500字符
time_node = other_info.find('div', class_='time')
views_node = other_info.find('div', class_='views')
download_node = other_info.find('div', class_='download')
if time_node:
print(f"Time node found: {time_node.get_text(strip=True)}")
else:
print("Time node NOT found")
if views_node:
print(f"Views node found: {views_node.get_text(strip=True)}")
else:
print("Views node NOT found")
if download_node:
print(f"Download node found: {download_node.get_text(strip=True)}")
else:
print("Download node NOT found")
else:
print("--- 'other-info' node NOT found ---")
# 打印部分页面源代码以分析
print("Page title:", soup.title.string if soup.title else "No title")
print("First 200 chars of body:", str(soup.body)[:200] if soup.body else "No body")
except Exception as e:
print(f"Error: {e}")
if __name__ == "__main__":
check_url("https://www.zxxk.com/soft/38837976.html")