62 lines
2.0 KiB
Python
62 lines
2.0 KiB
Python
# services/crawler_82.py
|
|
import requests
|
|
import json
|
|
import logging
|
|
from lxml import etree
|
|
from config import Config
|
|
from datetime import datetime
|
|
|
|
CONFIG = Config.CRAWLER_CONFIG["82"]
|
|
|
|
|
|
def run_82_logic():
|
|
"""返回 result_list"""
|
|
results = []
|
|
print(">>> [82爬虫] 启动...")
|
|
session = requests.Session()
|
|
|
|
try:
|
|
session.post(f"{CONFIG['base_url']}/login.php", data=CONFIG["login"], timeout=10)
|
|
resp = session.post(f"{CONFIG['base_url']}/GetStationList.php", timeout=10)
|
|
html = etree.HTML(resp.content)
|
|
if html is None: return []
|
|
|
|
stations = html.xpath('//option/@value')
|
|
for sid in [s for s in stations if s]:
|
|
data_packet = {
|
|
'source': '82网站',
|
|
'name': str(sid),
|
|
'status': '正常',
|
|
'value': '',
|
|
'target_time': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
|
'raw_json': {},
|
|
'temp_file': None
|
|
}
|
|
try:
|
|
r = session.post(f"{CONFIG['base_url']}/getLastWeatherData.php", data=str(sid),
|
|
headers={'Content-Type': 'text/plain'}, timeout=10)
|
|
try:
|
|
data = r.json()
|
|
except:
|
|
data = None
|
|
|
|
if data:
|
|
d_list = data.get('date', [])
|
|
latest = str(d_list[-1]) if d_list else "N/A"
|
|
data_packet['target_time'] = latest
|
|
data_packet['value'] = f"Data Points: {len(d_list)}"
|
|
data_packet['raw_json'] = data # 🔥 存完整JSON
|
|
else:
|
|
data_packet['status'] = '异常'
|
|
data_packet['value'] = "返回空数据"
|
|
|
|
except Exception as e:
|
|
data_packet['status'] = '异常'
|
|
data_packet['value'] = "单个采集失败"
|
|
|
|
results.append(data_packet)
|
|
|
|
except Exception as e:
|
|
logging.error(f"82 Crawler Error: {e}")
|
|
|
|
return results |