Files
ZDXX/2_3banben/services/crawler_82.py
2026-01-09 13:38:51 +08:00

62 lines
2.0 KiB
Python

# services/crawler_82.py
import requests
import json
import logging
from lxml import etree
from config import Config
from datetime import datetime
CONFIG = Config.CRAWLER_CONFIG["82"]
def run_82_logic():
"""返回 result_list"""
results = []
print(">>> [82爬虫] 启动...")
session = requests.Session()
try:
session.post(f"{CONFIG['base_url']}/login.php", data=CONFIG["login"], timeout=10)
resp = session.post(f"{CONFIG['base_url']}/GetStationList.php", timeout=10)
html = etree.HTML(resp.content)
if html is None: return []
stations = html.xpath('//option/@value')
for sid in [s for s in stations if s]:
data_packet = {
'source': '82网站',
'name': str(sid),
'status': '正常',
'value': '',
'target_time': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
'raw_json': {},
'temp_file': None
}
try:
r = session.post(f"{CONFIG['base_url']}/getLastWeatherData.php", data=str(sid),
headers={'Content-Type': 'text/plain'}, timeout=10)
try:
data = r.json()
except:
data = None
if data:
d_list = data.get('date', [])
latest = str(d_list[-1]) if d_list else "N/A"
data_packet['target_time'] = latest
data_packet['value'] = f"Data Points: {len(d_list)}"
data_packet['raw_json'] = data # 🔥 存完整JSON
else:
data_packet['status'] = '异常'
data_packet['value'] = "返回空数据"
except Exception as e:
data_packet['status'] = '异常'
data_packet['value'] = "单个采集失败"
results.append(data_packet)
except Exception as e:
logging.error(f"82 Crawler Error: {e}")
return results