61 lines
2.3 KiB
Python
61 lines
2.3 KiB
Python
import requests
|
|
from bs4 import BeautifulSoup
|
|
import re
|
|
|
|
# 使用 Session 模拟真实浏览器
|
|
HEADERS = {
|
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
|
|
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
|
'Referer': 'https://www.zxxk.com/'
|
|
}
|
|
|
|
def check_url(url):
|
|
print(f"Checking URL: {url}")
|
|
session = requests.Session()
|
|
try:
|
|
response = session.get(url, headers=HEADERS, timeout=10)
|
|
print(f"Status Code: {response.status_code}")
|
|
|
|
if "check()" in response.text and "<script" in response.text:
|
|
print("--- RESULT: Blocked by JavaScript Anti-Bot Challenge ---")
|
|
return
|
|
|
|
soup = BeautifulSoup(response.text, 'html.parser')
|
|
|
|
# 寻找 other-info 节点
|
|
other_info = soup.find('div', class_='other-info')
|
|
if other_info:
|
|
print("--- Found 'other-info' node ---")
|
|
print(f"Content: {other_info.prettify()[:500]}...") # 打印前500字符
|
|
|
|
time_node = other_info.find('div', class_='time')
|
|
views_node = other_info.find('div', class_='views')
|
|
download_node = other_info.find('div', class_='download')
|
|
|
|
if time_node:
|
|
print(f"Time node found: {time_node.get_text(strip=True)}")
|
|
else:
|
|
print("Time node NOT found")
|
|
|
|
if views_node:
|
|
print(f"Views node found: {views_node.get_text(strip=True)}")
|
|
else:
|
|
print("Views node NOT found")
|
|
|
|
if download_node:
|
|
print(f"Download node found: {download_node.get_text(strip=True)}")
|
|
else:
|
|
print("Download node NOT found")
|
|
else:
|
|
print("--- 'other-info' node NOT found ---")
|
|
# 打印部分页面源代码以分析
|
|
print("Page title:", soup.title.string if soup.title else "No title")
|
|
print("First 200 chars of body:", str(soup.body)[:200] if soup.body else "No body")
|
|
|
|
except Exception as e:
|
|
print(f"Error: {e}")
|
|
|
|
if __name__ == "__main__":
|
|
check_url("https://www.zxxk.com/soft/38837976.html")
|