import os import json import time import requests from lxml import etree from datetime import datetime # --- 配置区 --- BASE_URL = "http://82.156.1.111/weather/php" LOGIN_DATA = {'username': 'renlixin', 'password': 'licahk', 'login': '123'} OUTPUT_DIR = "weather_data_test" os.makedirs(OUTPUT_DIR, exist_ok=True) session = requests.Session() def fetch_stations(): resp = session.post(f"{BASE_URL}/GetStationList.php") if resp.status_code != 200: return [] tree = etree.HTML(resp.content) stations = [s for s in tree.xpath('//option/@value') if s and str(s).strip()] return stations def get_latest_data_item(station_id, data_list): """ 核心改进:从列表中筛选出日期距离今天最近的一条数据 """ if not data_list or not isinstance(data_list, list): return None today = datetime.now().date() parsed_items = [] for item in data_list: try: # 兼容处理:如果是字符串列表,直接解析;如果是对象列表,取特定键 date_str = item.split()[0] if isinstance(item, str) else item.get('date', '').split()[0] current_date = datetime.strptime(date_str, "%Y-%m-%d").date() # 计算与今天的差异(天数绝对值) diff = abs((today - current_date).days) parsed_items.append({ 'diff': diff, 'date_obj': current_date, 'original_data': item }) except: continue if not parsed_items: return None # --- 逻辑更新:按时间差异升序排序(diff越小说明离今天越近) --- parsed_items.sort(key=lambda x: x['diff']) # 返回距离最近的那一条原始数据 return parsed_items[0] def save_json(name, data): path = os.path.join(OUTPUT_DIR, f"{name}.json") with open(path, 'w', encoding='utf-8') as f: json.dump(data, f, indent=2, ensure_ascii=False) def main(): print("正在登录...") session.post(f"{BASE_URL}/login.php", data=LOGIN_DATA) stations = fetch_stations() print(f"成功获取 {len(stations)} 个有效站点") for i, sid in enumerate(stations, 1): print(f"[{i}/{len(stations)}] 处理站点: {sid}") try: resp = session.post(f"{BASE_URL}/getLastWeatherData.php", data=str(sid), headers={'Content-Type': 'text/plain'}) if resp.status_code == 200: full_data = resp.json() # 假设 API 返回的 JSON 中 'date' 键对应的是数据列表 # 我们调用 get_latest_data_item 来锁定那唯一的一条最新数据 data_key = 'date' if 'date' in full_data else 'items' # 根据实际键名调整 target_list = full_data.get(data_key, []) latest_result = get_latest_data_item(sid, target_list) if latest_result: # 重新构造保存内容:只保留最接近今天的数据 final_payload = { "proxy_name": sid, "status": "online", "latest_date": str(latest_result['date_obj']), "days_diff": latest_result['diff'], "data_content": latest_result['original_data'] } # 如果不是今天,输出警告 if latest_result['diff'] != 0: print(f" ⚠️ 非当天数据: {latest_result['date_obj']} (差 {latest_result['diff']} 天)") else: print(f" ✨ 数据已同步至今天") save_json(sid, final_payload) else: print(f" ⚪ 站点 {sid} 未找到有效日期数据") else: print(f" ❌ 请求失败: {resp.status_code}") except Exception as e: print(f" ❌ 错误: {e}") time.sleep(0.3) if __name__ == "__main__": main()