commit 0699c6a32322881425ca9411b5d591fd9d86106b Author: duxingchen Date: Tue Jan 6 11:06:20 2026 +0800 上传文件至 1.1 获取站点信息 diff --git a/1.1/整合.py b/1.1/整合.py new file mode 100644 index 0000000..d50d322 --- /dev/null +++ b/1.1/整合.py @@ -0,0 +1,266 @@ +import os +import json +import time +import requests +import pandas as pd +from lxml import etree +from datetime import datetime + +# --- 基础配置 --- +DATA_ROOT = "data" +FRPS_DIR = os.path.join(DATA_ROOT, "frps_106") +WEATHER_DIR = os.path.join(DATA_ROOT, "weather_82") +EXCEL_PATH = os.path.join(DATA_ROOT, "error_report.xlsx") + +for d in [FRPS_DIR, WEATHER_DIR]: + os.makedirs(d, exist_ok=True) + +CONFIG = { + "106": { + "base_url": "http://106.75.72.40:7500/api/proxy/tcp", + "primary_auth": "Basic YWRtaW46bGljYWhr", + "x_auth": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VyIjp7ImlkIjoxLCJsb2NhbGUiOiJ6aC1jbiIsInZpZXdNb2RlIjoibGlzdCIsInNpbmdsZUNsaWNrIjpmYWxzZSwicGVybSI6eyJhZG1pbiI6dHJ1ZSwiZXhlY3V0ZSI6dHJ1ZSwiY3JlYXRlIjp0cnVlLCJyZW5hbWUiOnRydWUsIm1vZGlmeSI6dHJ1ZSwiZGVsZXRlIjp0cnVlLCJzaGFyZSI6dHJ1ZSwiZG93bmxvYWQiOnRydWV9LCJjb21tYW5kcyI6W10sImxvY2tQYXNzd29yZCI6ZmFsc2UsImhpZGVEb3RmaWxlcyI6ZmFsc2V9LCJleHAiOjE3Njc2Njg3NzgsImlhdCI6MTc2NzY2MTU3OCwiaXNzIjoiRmlsZSBCcm93c2VyIn0.z9zycFSf3XpUDRhGjziUJ-PUeHIsRba23AI6itqXM-w" + }, + "82": { + "base_url": "http://82.156.1.111/weather/php", + "login": {'username': 'renlixin', 'password': 'licahk', 'login': '123'} + } +} + +error_logs = [] + + +# --- 通用工具函数 --- + +def add_error(source, name, reason, latest_time="N/A"): + """记录错误并计算日期差""" + days_diff = "N/A" + if latest_time and latest_time != "N/A": + try: + # 兼容 2026_01_06 和 2026-01-06 格式 + clean_date_str = str(latest_time).split()[0].replace('_', '-') + target_date = datetime.strptime(clean_date_str, "%Y-%m-%d").date() + today_date = datetime.now().date() + diff = (today_date - target_date).days + days_diff = f"滞后 {diff} 天" if diff > 0 else "当天已同步" + except: + days_diff = "解析失败" + + error_logs.append({ + "数据来源": source, + "站点/代理名称": name, + "错误原因": reason, + "日期偏移量": days_diff, + "最新数据时间": latest_time, + "检查时间": datetime.now().strftime("%Y-%m-%d %H:%M:%S") + }) + + +def find_closest_item(items, is_date_level=True): + """寻找日期最接近今天的那一项""" + if not items or not isinstance(items, list): return None + today = datetime.now() + scored_items = [] + for item in items: + if not isinstance(item, dict): continue + path = item.get('path', '') + if not path: continue + try: + if is_date_level: + date_str = path.split('/')[-1] + current_date = datetime.strptime(date_str, "%Y_%m_%d") + else: + mod_str = item.get('modified', '') + if mod_str: + current_date = datetime.fromisoformat(mod_str.replace('Z', '+00:00')) + else: + continue + diff = abs((today - current_date.replace(tzinfo=None)).total_seconds()) + scored_items.append((diff, item)) + except: + continue + + if not scored_items: return None + scored_items.sort(key=lambda x: x[0]) + return scored_items[0][1] + + +def process_text_content(raw_content): + if not raw_content: return "" + lines = str(raw_content).split('\n') + result, current = [], "" + for line in lines: + if " " in line: + current += line.strip() + else: + if current: result.append(current) + current = line.strip() + if current: result.append(current) + return "\n".join(result) + + +def save_json(folder, name, data): + path = os.path.join(folder, f"{name}.json") + with open(path, 'w', encoding='utf-8') as f: + json.dump(data, f, indent=4, ensure_ascii=False) + + +# --- 106 网站逻辑 (强化防御版) --- + +def run_106_logic(): + print("\n>>> 开始处理 106 网站 (FRPS)...") + c = CONFIG["106"] + headers = {"Authorization": c["primary_auth"], "x-auth": c["x_auth"], "User-Agent": "Mozilla/5.0"} + today_str = datetime.now().strftime("%Y_%m_%d") + + try: + resp = requests.get(c["base_url"], headers=headers, timeout=15) + if resp.status_code != 200: + add_error("106网站", "主入口API", f"访问失败: HTTP {resp.status_code}") + return + + proxies = resp.json().get('proxies', []) + for item in proxies: + if not isinstance(item, dict): continue + name = item.get('name', 'Unknown') + if not name.endswith('_data'): continue + + try: + # 1. 状态预检 - 彻底解决 NoneType 问题 + status_raw = item.get('status', '') + status = str(status_raw).lower().strip() if status_raw else "unknown" + + conf = item.get('conf') or {} + port = conf.get('remote_port') + + # 离线直接判定,严禁继续访问二级接口 + if status != 'online': + add_error("106网站", name, f"设备离线 (当前状态: {status})") + save_json(FRPS_DIR, name, item) + continue + + if not port: + add_error("106网站", name, "配置错误: 缺少 remote_port") + continue + + # 2. 只有 Online 才进行二级访问 + res2 = requests.get(f"http://106.75.72.40:{port}/api/resources/Data/", headers=headers, timeout=10) + if res2.status_code != 200: + add_error("106网站", name, f"无法打开Data目录 (HTTP {res2.status_code})") + continue + + it2 = res2.json().get('items', []) + closest_date = find_closest_item(it2, True) + if not closest_date: + add_error("106网站", name, "Data目录为空") + continue + + path_date = closest_date.get('path', '') + date_val = path_date.split('/')[-1] + + # 记录日期不对的情况,但尝试继续抓取 + if date_val != today_str: + add_error("106网站", name, "日期非当天", date_val) + + # 3. 访问日期内文件列表 + res3 = requests.get(f"http://106.75.72.40:{port}/api/resources{path_date}/", headers=headers, + timeout=10) + it3 = res3.json().get('items', []) + closest_file = find_closest_item(it3, False) + if not closest_file: + add_error("106网站", name, "文件夹内无文件", date_val) + continue + + # 4. 读取内容并进行 NoneType 防御 + path_csv = closest_file.get('path', '') + res4 = requests.get(f"http://106.75.72.40:{port}/api/resources{path_csv}", headers=headers, timeout=10) + file_json = res4.json() + + if file_json is None: + add_error("106网站", name, "内容接口返回 Null", date_val) + continue + + raw_content = file_json.get('content', '') + if not raw_content: + add_error("106网站", name, "content字段为空", date_val) + + save_json(FRPS_DIR, name, { + "status": status, + "latest_path": path_csv, + "content": process_text_content(raw_content) + }) + + except Exception as e: + add_error("106网站", name, f"站点级崩溃: {str(e)}") + + except Exception as e: + add_error("106网站", "全局逻辑", f"主进程崩溃: {str(e)}") + + +# --- 82 网站逻辑 --- + +def run_82_logic(): + print("\n>>> 开始处理 82 网站 (Weather)...") + c = CONFIG["82"] + session = requests.Session() + today_fmt = datetime.now().strftime("%Y-%m-%d") + + try: + session.post(f"{c['base_url']}/login.php", data=c["login"], timeout=10) + resp = session.post(f"{c['base_url']}/GetStationList.php", timeout=10) + if resp.status_code != 200: + add_error("82网站", "登录模块", f"无法获取列表: HTTP {resp.status_code}") + return + + stations = etree.HTML(resp.content).xpath('//option/@value') + stations = [s for s in stations if s and str(s).strip()] + + for sid in stations: + try: + r = session.post(f"{c['base_url']}/getLastWeatherData.php", data=str(sid), + headers={'Content-Type': 'text/plain'}, timeout=10) + if r.status_code != 200: + add_error("82网站", sid, f"请求失败: HTTP {r.status_code}") + continue + + data = r.json() + if data is None: + add_error("82网站", sid, "返回 Null 数据") + continue + + d_list = data.get('date', []) + if not d_list: + add_error("82网站", sid, "返回结果中无日期列表") + else: + latest = str(d_list[-1]) + if latest.split()[0] != today_fmt: + add_error("82网站", sid, "数据非当天更新", latest) + + save_json(WEATHER_DIR, sid, data) + time.sleep(0.1) + except Exception as e: + add_error("82网站", sid, f"数据解析异常: {str(e)}") + except Exception as e: + add_error("82网站", "初始化模块", str(e)) + + +# --- 汇总导出 --- + +def export_to_excel(): + if not error_logs: + print("\n[✔] 未发现错误记录。") + return + + df = pd.DataFrame(error_logs) + cols = ["数据来源", "站点/代理名称", "错误原因", "日期偏移量", "最新数据时间", "检查时间"] + # 过滤掉 dataframe 中不存在的列,防止报错 + df = df[[c for c in cols if c in df.columns]] + df.to_excel(EXCEL_PATH, index=False) + print(f"\n[!] 错误报表已生成至: {EXCEL_PATH} (共 {len(error_logs)} 条)") + + +if __name__ == "__main__": + run_106_logic() + run_82_logic() + export_to_excel() + print("\n任务全部完成。") \ No newline at end of file