分步式页面布局,首页页面设计实现初稿
This commit is contained in:
56
2.1版本/services/crawler_82.py
Normal file
56
2.1版本/services/crawler_82.py
Normal file
@ -0,0 +1,56 @@
|
||||
import requests
|
||||
import json
|
||||
import logging
|
||||
from lxml import etree
|
||||
from config import Config
|
||||
|
||||
# 读取配置
|
||||
CONFIG = Config.CRAWLER_CONFIG["82"]
|
||||
|
||||
|
||||
def run_82_logic(active_set, save_callback):
|
||||
session = requests.Session()
|
||||
print(">>> [82爬虫] 启动...")
|
||||
try:
|
||||
# 1. 登录
|
||||
session.post(f"{CONFIG['base_url']}/login.php", data=CONFIG["login"], timeout=10)
|
||||
|
||||
# 2. 获取列表
|
||||
resp = session.post(f"{CONFIG['base_url']}/GetStationList.php", timeout=10)
|
||||
|
||||
# 使用 lxml 解析
|
||||
html = etree.HTML(resp.content)
|
||||
if html is None:
|
||||
print(">>> [82爬虫] 解析页面失败")
|
||||
return
|
||||
|
||||
stations = html.xpath('//option/@value')
|
||||
|
||||
for sid in [s for s in stations if s]:
|
||||
try:
|
||||
# 3. 获取单个设备数据
|
||||
r = session.post(f"{CONFIG['base_url']}/getLastWeatherData.php", data=str(sid),
|
||||
headers={'Content-Type': 'text/plain'}, timeout=10)
|
||||
# 尝试解析 JSON
|
||||
try:
|
||||
data = r.json()
|
||||
except ValueError:
|
||||
data = None
|
||||
|
||||
if data:
|
||||
d_list = data.get('date', [])
|
||||
latest = str(d_list[-1]) if d_list else "N/A"
|
||||
# 保存数据
|
||||
key = save_callback("82网站", sid, "正常", "同步成功", latest_time=latest,
|
||||
content=json.dumps(data, ensure_ascii=False))
|
||||
if key: active_set.add(key)
|
||||
else:
|
||||
key = save_callback("82网站", sid, "异常", "返回空数据")
|
||||
if key: active_set.add(key)
|
||||
except Exception as e:
|
||||
# 单个设备失败不影响整体
|
||||
key = save_callback("82网站", sid, "异常", "单个采集失败")
|
||||
if key: active_set.add(key)
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"82 Crawler Error: {e}")
|
||||
Reference in New Issue
Block a user