import os import json import time import requests import pandas as pd from lxml import etree from datetime import datetime # --- 基础配置 --- DATA_ROOT = "data" FRPS_DIR = os.path.join(DATA_ROOT, "frps_106") WEATHER_DIR = os.path.join(DATA_ROOT, "weather_82") EXCEL_PATH = os.path.join(DATA_ROOT, "error_report.xlsx") for d in [FRPS_DIR, WEATHER_DIR]: os.makedirs(d, exist_ok=True) CONFIG = { "106": { "base_url": "http://106.75.72.40:7500/api/proxy/tcp", "primary_auth": "Basic YWRtaW46bGljYWhr", "login_payload": {"username": "admin", "password": "licahk", "recaptcha": ""} }, "82": { "base_url": "http://82.156.1.111/weather/php", "login": {'username': 'renlixin', 'password': 'licahk', 'login': '123'} } } error_logs = [] # --- 通用工具函数 --- def add_error(source, name, reason, latest_time="N/A"): """记录错误并计算日期差""" days_diff = "N/A" if latest_time and latest_time != "N/A": try: clean_date_str = str(latest_time).split()[0].replace('_', '-') target_date = datetime.strptime(clean_date_str, "%Y-%m-%d").date() today_date = datetime.now().date() diff = (today_date - target_date).days days_diff = f"滞后 {diff} 天" if diff > 0 else "当天已同步" except: days_diff = "解析失败" error_logs.append({ "数据来源": source, "站点/代理名称": name, "错误原因": reason, "日期偏移量": days_diff, "最新数据时间": latest_time, "检查时间": datetime.now().strftime("%Y-%m-%d %H:%M:%S") }) def find_closest_item(items, is_date_level=True): """寻找日期最接近今天的那一项""" if not items or not isinstance(items, list): return None today = datetime.now() scored_items = [] for item in items: if not isinstance(item, dict): continue name_val = item.get('name', '') path_val = item.get('path', '') target_str = name_val if name_val else path_val.split('/')[-1] try: if is_date_level: current_date = datetime.strptime(target_str, "%Y_%m_%d") else: mod_str = item.get('modified', '') if mod_str: current_date = datetime.fromisoformat(mod_str.replace('Z', '+00:00')) else: continue diff = abs((today - current_date.replace(tzinfo=None)).total_seconds()) scored_items.append((diff, item, target_str)) except: continue if not scored_items: return None scored_items.sort(key=lambda x: x[0]) return scored_items[0] def process_text_content(raw_content): if not raw_content: return "" lines = str(raw_content).split('\n') result, current = [], "" for line in lines: if " " in line: current += line.strip() else: if current: result.append(current) current = line.strip() if current: result.append(current) return "\n".join(result) def save_json(folder, name, data): path = os.path.join(folder, f"{name}.json") with open(path, 'w', encoding='utf-8') as f: json.dump(data, f, indent=4, ensure_ascii=False) # --- 106 网站逻辑 --- def get_106_dynamic_token(port): """为106特定端口站点执行登录获取 Token""" url = f"http://106.75.72.40:{port}/api/login" try: resp = requests.post(url, json=CONFIG["106"]["login_payload"], timeout=10) if resp.status_code == 200: return resp.text.strip().replace('"', '') except: pass return None def run_106_logic(): print("\n>>> 开始处理 106 网站 (FRPS - 修正逻辑)...") c = CONFIG["106"] today_str = datetime.now().strftime("%Y_%m_%d") try: main_headers = {"Authorization": c["primary_auth"], "User-Agent": "Mozilla/5.0"} resp = requests.get(c["base_url"], headers=main_headers, timeout=15) if resp.status_code != 200: add_error("106网站", "主入口API", f"访问失败: HTTP {resp.status_code}") return proxies = resp.json().get('proxies', []) for item in proxies: if not isinstance(item, dict): continue name = item.get('name', 'Unknown') if not name.lower().endswith('_data'): continue # 1. 状态预检 - 离线拦截逻辑 status_raw = item.get('status', '') status = str(status_raw).lower().strip() if status_raw else "unknown" # 如果离线,直接保存并记录错误,不再进行后续 API 访问 if status != 'online': add_error("106网站", name, f"设备离线 (当前状态: {status})") save_json(FRPS_DIR, name, item) continue # 2. 识别站点类型并进行在线处理 name_up = name.upper() is_tower_underscore = "TOWER_" in name_up is_tower_i = "TOWER" in name_up and not is_tower_underscore if not (is_tower_underscore or is_tower_i): continue try: conf = item.get('conf') or {} port = conf.get('remote_port') if not port: add_error("106网站", name, "配置错误: 缺少 remote_port") continue # 只有 Online 才获取子站点 Token token = get_106_dynamic_token(port) if not token: add_error("106网站", name, "Token获取失败(登录异常)") continue headers = {"Authorization": c["primary_auth"], "x-auth": token, "User-Agent": "Mozilla/5.0"} # 查找 Data 根目录 (根据类型区分大小写) api_root = "/api/resources/Data/" if is_tower_underscore else "/api/resources/data/" res2 = requests.get(f"http://106.75.72.40:{port}{api_root}", headers=headers, timeout=10) if res2.status_code != 200: add_error("106网站", name, f"无法打开Data目录 (HTTP {res2.status_code})") continue it2 = res2.json().get('items', []) best_date = find_closest_item(it2, is_date_level=True) if not best_date or best_date[2] != today_str: add_error("106网站", name, "未找到今日文件夹", best_date[2] if best_date else "N/A") if not best_date: continue date_path = f"{api_root}{best_date[2]}/" # 查找文件夹内最新文件 res3 = requests.get(f"http://106.75.72.40:{port}{date_path}", headers=headers, timeout=10) it3 = res3.json().get('items', []) best_file = find_closest_item(it3, is_date_level=False) if not best_file: add_error("106网站", name, "文件夹内无文件", best_date[2]) continue file_item = best_file[1] full_path = file_item.get('path') or f"{date_path}{file_item.get('name')}" # 3. 根据类型下载内容 if is_tower_i: # TowerI 模式:使用 raw 接口获取二进制 raw_url = f"http://106.75.72.40:{port}/api/raw{full_path}" res4 = requests.get(raw_url, headers=headers, timeout=20) if res4.status_code == 200: save_path = os.path.join(FRPS_DIR, f"{name}_{today_str}.bin") with open(save_path, 'wb') as f: f.write(res4.content) print(f" ✅ {name} 二进制数据保存成功") else: # Tower_ 模式:使用 resources 接口获取 JSON file_api_url = f"http://106.75.72.40:{port}/api/resources{full_path}" res4 = requests.get(file_api_url, headers=headers, timeout=20) file_json = res4.json() raw_content = file_json.get('content', '') if file_json else None if raw_content: save_path = os.path.join(FRPS_DIR, f"{name}_{today_str}.json") with open(save_path, 'w', encoding='utf-8') as f: f.write(process_text_content(raw_content)) print(f" ✅ {name} JSON数据保存成功") else: add_error("106网站", name, "文件内容为空", best_date[2]) except Exception as e: add_error("106网站", name, f"站点处理崩溃: {str(e)}") except Exception as e: add_error("106网站", "全局逻辑", f"主进程崩溃: {str(e)}") # --- 82 网站逻辑 (保持原样) --- def run_82_logic(): print("\n>>> 开始处理 82 网站 (Weather)...") c = CONFIG["82"] session = requests.Session() today_fmt = datetime.now().strftime("%Y-%m-%d") try: session.post(f"{c['base_url']}/login.php", data=c["login"], timeout=10) resp = session.post(f"{c['base_url']}/GetStationList.php", timeout=10) if resp.status_code != 200: add_error("82网站", "登录模块", f"无法获取列表: HTTP {resp.status_code}") return stations = etree.HTML(resp.content).xpath('//option/@value') stations = [s for s in stations if s and str(s).strip()] for sid in stations: try: r = session.post(f"{c['base_url']}/getLastWeatherData.php", data=str(sid), headers={'Content-Type': 'text/plain'}, timeout=10) if r.status_code != 200: add_error("82网站", sid, f"请求失败: HTTP {r.status_code}") continue data = r.json() if data is None: add_error("82网站", sid, "返回 Null 数据") continue d_list = data.get('date', []) if not d_list: add_error("82网站", sid, "返回结果中无日期列表") else: latest = str(d_list[-1]) if latest.split()[0] != today_fmt: add_error("82网站", sid, "数据非当天更新", latest) save_json(WEATHER_DIR, sid, data) time.sleep(0.1) except Exception as e: add_error("82网站", sid, f"数据解析异常: {str(e)}") except Exception as e: add_error("82网站", "初始化模块", str(e)) # --- 汇总导出 --- def export_to_excel(): if not error_logs: print("\n[✔] 未发现错误记录。") return df = pd.DataFrame(error_logs) cols = ["数据来源", "站点/代理名称", "错误原因", "日期偏移量", "最新数据时间", "检查时间"] df = df[[c for c in cols if c in df.columns]] df.to_excel(EXCEL_PATH, index=False) print(f"\n[!] 错误报表已生成至: {EXCEL_PATH} (共 {len(error_logs)} 条)") if __name__ == "__main__": run_106_logic() run_82_logic() export_to_excel() print("\n任务全部完成。")