打包上传的2.0版本

2026-01-09 12:48:50 +08:00
parent ca895af384
commit ffbd494b7b
14 changed files with 240 additions and 162 deletions
--- a/2_1banben/services/init.py
+++ b/2_1banben/services/init.py
@ -0,0 +1,2 @@
+# services/__init__.py
+# 这是一个空文件，用于将 services 文件夹标识为 Python 包。
--- a/2_1banben/services/core.py
+++ b/2_1banben/services/core.py
@ -0,0 +1,37 @@
+# services/core.py
+import logging
+import threading
+from .crawler_106 import run_106_logic
+from .crawler_82 import run_82_logic
+
+task_lock = threading.Lock()
+
+
+def execute_monitor_task():
+    """
+    执行所有爬虫，返回一个大列表：
+    {'device_list': [item1, item2...], 'target_time': '...'}
+    """
+    if task_lock.locked():
+        logging.warning(">>> 任务正在运行中，跳过")
+        return None
+
+    with task_lock:
+        logging.info(">>> 开始执行监控任务...")
+
+        # 1. 获取 106 数据列表
+        list_106 = run_106_logic()
+
+        # 2. 获取 82 数据列表
+        list_82 = run_82_logic()
+
+        # 3. 合并
+        combined_list = list_106 + list_82
+
+        logging.info(f">>> 任务完成，共获取 {len(combined_list)} 条数据")
+
+        return {
+            'device_list': combined_list,
+            'target_time': None,  # 具体时间已在 item 里
+            'temp_file_path': None  # 废弃旧逻辑，文件路径已在 item 里
+        }
--- a/2_1banben/services/crawler_106.py
+++ b/2_1banben/services/crawler_106.py
@ -0,0 +1,159 @@
+# services/crawler_106.py
+import os
+import requests
+import logging
+from datetime import datetime
+from config import Config
+
+CONFIG = Config.CRAWLER_CONFIG["106"]
+
+
+def get_temp_dir():
+    base_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
+    temp_dir = os.path.join(base_dir, 'instance', 'temp')
+    if not os.path.exists(temp_dir):
+        os.makedirs(temp_dir)
+    return temp_dir
+
+
+def get_106_dynamic_token(port):
+    try:
+        login_url = f"http://106.75.72.40:{port}/api/login"
+        resp = requests.post(login_url, json=CONFIG["login_payload"], timeout=10)
+        return resp.text.strip().replace('"', '') if resp.status_code == 200 else None
+    except:
+        return None
+
+
+def find_closest_item(items, is_date_level=True):
+    if not items or not isinstance(items, list): return None
+    today = datetime.now()
+    scored_items = []
+    for item in items:
+        name_val = item.get('name', '')
+        path_val = item.get('path', '')
+        target_str = name_val if name_val else path_val.split('/')[-1]
+        try:
+            if is_date_level:
+                current_date = datetime.strptime(target_str, "%Y_%m_%d")
+            else:
+                mod_str = item.get('modified', '')
+                current_date = datetime.fromisoformat(mod_str.replace('Z', '+00:00'))
+            diff = abs((today - current_date.replace(tzinfo=None)).total_seconds())
+            scored_items.append((diff, item, target_str))
+        except:
+            continue
+    if not scored_items: return None
+    scored_items.sort(key=lambda x: x[0])
+    return scored_items[0]
+
+
+def run_106_logic():
+    """返回 result_list, 每个元素是一个字典"""
+    results = []
+    print(">>> [106爬虫] 启动...")
+    today_str = datetime.now().strftime("%Y_%m_%d")
+    main_headers = {"Authorization": CONFIG["primary_auth"], "User-Agent": "Mozilla/5.0"}
+
+    try:
+        resp = requests.get(CONFIG["base_url"], headers=main_headers, timeout=20)
+        proxies = resp.json().get('proxies', [])
+
+        for item in proxies:
+            name = item.get('name', '')
+            if not name.lower().endswith('_data'): continue
+            name_upper = name.upper()
+            is_tower_underscore = "TOWER_" in name_upper
+            is_tower_i = "TOWER" in name_upper and not is_tower_underscore
+            if not (is_tower_underscore or is_tower_i): continue
+
+            # 构建基础数据包
+            data_packet = {
+                'source': '106网站',
+                'name': name,
+                'status': '正常',
+                'value': '',
+                'target_time': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                'raw_json': {},
+                'temp_file': None
+            }
+
+            if str(item.get('status')).lower() != 'online':
+                data_packet['status'] = '离线'
+                data_packet['value'] = f"状态: {item.get('status')}"
+                results.append(data_packet)
+                continue
+
+            try:
+                port = item.get('conf', {}).get('remote_port')
+                token = get_106_dynamic_token(port)
+                if not token:
+                    data_packet['status'] = '异常'
+                    data_packet['value'] = "Token获取失败"
+                    results.append(data_packet)
+                    continue
+
+                headers = {"Authorization": CONFIG["primary_auth"], "x-auth": token}
+                api_root = "/api/resources/Data/" if is_tower_underscore else "/api/resources/data/"
+
+                res1 = requests.get(f"http://106.75.72.40:{port}{api_root}", headers=headers, timeout=10)
+                best_date = find_closest_item(res1.json().get('items', []), True)
+
+                if not best_date or best_date[2] != today_str:
+                    data_packet['value'] = "未找到今日文件夹"
+                    data_packet['target_time'] = best_date[2] if best_date else "N/A"
+                    results.append(data_packet)
+                    continue
+
+                data_packet['target_time'] = best_date[2]  # 实际数据时间
+                date_path = f"{api_root}{best_date[2]}/"
+                res2 = requests.get(f"http://106.75.72.40:{port}{date_path}", headers=headers, timeout=10)
+                best_file = find_closest_item(res2.json().get('items', []), False)
+
+                if not best_file:
+                    data_packet['value'] = "今日文件夹为空"
+                    results.append(data_packet)
+                    continue
+
+                file_item = best_file[1]
+                full_path = file_item.get('path') or f"{date_path}{file_item.get('name')}"
+
+                # 核心逻辑：获取内容
+                if is_tower_i:
+                    # 下载二进制文件
+                    download_url = f"http://106.75.72.40:{port}/api/raw{full_path}"
+                    res3 = requests.get(download_url, headers=headers, timeout=20, stream=True)
+                    if res3.status_code == 200:
+                        safe_name = f"{name}_{datetime.now().strftime('%H%M%S')}.db"
+                        temp_path = os.path.join(get_temp_dir(), safe_name)
+                        with open(temp_path, 'wb') as f:
+                            f.write(res3.content)
+
+                        data_packet['temp_file'] = temp_path  # 🔥 传递给API
+                        data_packet['value'] = f"Binary Downloaded: {len(res3.content)} bytes"
+                        data_packet['raw_json'] = file_item  # 用文件属性充当RawData
+                    else:
+                        data_packet['status'] = '异常'
+                        data_packet['value'] = f"下载失败: {res3.status_code}"
+                else:
+                    # JSON 内容
+                    file_api_url = f"http://106.75.72.40:{port}/api/resources{full_path}"
+                    res3 = requests.get(file_api_url, headers=headers, timeout=20)
+                    try:
+                        json_content = res3.json()
+                        data_packet['raw_json'] = json_content  # 🔥 完整保存
+                        data_packet['value'] = json_content.get('content', '')
+                    except:
+                        data_packet['value'] = "JSON解析失败"
+
+                results.append(data_packet)
+
+            except Exception as e:
+                data_packet['status'] = '异常'
+                data_packet['value'] = str(e)[:50]
+                results.append(data_packet)
+
+    except Exception as e:
+        logging.error(f"106 Crawler Error: {e}")
+
+    return results
--- a/2_1banben/services/crawler_82.py
+++ b/2_1banben/services/crawler_82.py
@ -0,0 +1,62 @@
+# services/crawler_82.py
+import requests
+import json
+import logging
+from lxml import etree
+from config import Config
+from datetime import datetime
+
+CONFIG = Config.CRAWLER_CONFIG["82"]
+
+
+def run_82_logic():
+    """返回 result_list"""
+    results = []
+    print(">>> [82爬虫] 启动...")
+    session = requests.Session()
+
+    try:
+        session.post(f"{CONFIG['base_url']}/login.php", data=CONFIG["login"], timeout=10)
+        resp = session.post(f"{CONFIG['base_url']}/GetStationList.php", timeout=10)
+        html = etree.HTML(resp.content)
+        if html is None: return []
+
+        stations = html.xpath('//option/@value')
+        for sid in [s for s in stations if s]:
+            data_packet = {
+                'source': '82网站',
+                'name': str(sid),
+                'status': '正常',
+                'value': '',
+                'target_time': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                'raw_json': {},
+                'temp_file': None
+            }
+            try:
+                r = session.post(f"{CONFIG['base_url']}/getLastWeatherData.php", data=str(sid),
+                                 headers={'Content-Type': 'text/plain'}, timeout=10)
+                try:
+                    data = r.json()
+                except:
+                    data = None
+
+                if data:
+                    d_list = data.get('date', [])
+                    latest = str(d_list[-1]) if d_list else "N/A"
+                    data_packet['target_time'] = latest
+                    data_packet['value'] = f"Data Points: {len(d_list)}"
+                    data_packet['raw_json'] = data  # 🔥 存完整JSON
+                else:
+                    data_packet['status'] = '异常'
+                    data_packet['value'] = "返回空数据"
+
+            except Exception as e:
+                data_packet['status'] = '异常'
+                data_packet['value'] = "单个采集失败"
+
+            results.append(data_packet)
+
+    except Exception as e:
+        logging.error(f"82 Crawler Error: {e}")
+
+    return results