修改数据获取,确保json文件完整获取

This commit is contained in:
YueL1331
2026-01-08 14:26:34 +08:00
parent a5b0b71d26
commit a8984a156c
9 changed files with 304 additions and 479 deletions

View File

@ -1,56 +1,62 @@
# services/crawler_82.py
import requests
import json
import logging
from lxml import etree
from config import Config
from datetime import datetime
# 读取配置
CONFIG = Config.CRAWLER_CONFIG["82"]
def run_82_logic(active_set, save_callback):
session = requests.Session()
def run_82_logic():
"""返回 result_list"""
results = []
print(">>> [82爬虫] 启动...")
session = requests.Session()
try:
# 1. 登录
session.post(f"{CONFIG['base_url']}/login.php", data=CONFIG["login"], timeout=10)
# 2. 获取列表
resp = session.post(f"{CONFIG['base_url']}/GetStationList.php", timeout=10)
# 使用 lxml 解析
html = etree.HTML(resp.content)
if html is None:
print(">>> [82爬虫] 解析页面失败")
return
if html is None: return []
stations = html.xpath('//option/@value')
for sid in [s for s in stations if s]:
data_packet = {
'source': '82网站',
'name': str(sid),
'status': '正常',
'value': '',
'target_time': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
'raw_json': {},
'temp_file': None
}
try:
# 3. 获取单个设备数据
r = session.post(f"{CONFIG['base_url']}/getLastWeatherData.php", data=str(sid),
headers={'Content-Type': 'text/plain'}, timeout=10)
# 尝试解析 JSON
try:
data = r.json()
except ValueError:
except:
data = None
if data:
d_list = data.get('date', [])
latest = str(d_list[-1]) if d_list else "N/A"
# 保存数据
key = save_callback("82网站", sid, "正常", "同步成功", latest_time=latest,
content=json.dumps(data, ensure_ascii=False))
if key: active_set.add(key)
data_packet['target_time'] = latest
data_packet['value'] = f"Data Points: {len(d_list)}"
data_packet['raw_json'] = data # 🔥 存完整JSON
else:
key = save_callback("82网站", sid, "异常", "返回空数据")
if key: active_set.add(key)
data_packet['status'] = '异常'
data_packet['value'] = "返回空数据"
except Exception as e:
# 单个设备失败不影响整体
key = save_callback("82网站", sid, "异常", "单个采集失败")
if key: active_set.add(key)
data_packet['status'] = '异常'
data_packet['value'] = "单个采集失败"
results.append(data_packet)
except Exception as e:
logging.error(f"82 Crawler Error: {e}")
logging.error(f"82 Crawler Error: {e}")
return results