import requests import json import logging from lxml import etree from config import Config # 读取配置 CONFIG = Config.CRAWLER_CONFIG["82"] def run_82_logic(active_set, save_callback): session = requests.Session() print(">>> [82爬虫] 启动...") try: # 1. 登录 session.post(f"{CONFIG['base_url']}/login.php", data=CONFIG["login"], timeout=10) # 2. 获取列表 resp = session.post(f"{CONFIG['base_url']}/GetStationList.php", timeout=10) # 使用 lxml 解析 html = etree.HTML(resp.content) if html is None: print(">>> [82爬虫] 解析页面失败") return stations = html.xpath('//option/@value') for sid in [s for s in stations if s]: try: # 3. 获取单个设备数据 r = session.post(f"{CONFIG['base_url']}/getLastWeatherData.php", data=str(sid), headers={'Content-Type': 'text/plain'}, timeout=10) # 尝试解析 JSON try: data = r.json() except ValueError: data = None if data: d_list = data.get('date', []) latest = str(d_list[-1]) if d_list else "N/A" # 保存数据 key = save_callback("82网站", sid, "正常", "同步成功", latest_time=latest, content=json.dumps(data, ensure_ascii=False)) if key: active_set.add(key) else: key = save_callback("82网站", sid, "异常", "返回空数据") if key: active_set.add(key) except Exception as e: # 单个设备失败不影响整体 key = save_callback("82网站", sid, "异常", "单个采集失败") if key: active_set.add(key) except Exception as e: logging.error(f"82 Crawler Error: {e}")