# services/core.py import logging import threading import traceback from datetime import datetime # ============================================================================== # 1. 动态导入模块 # ============================================================================== try: from .crawler_106 import run_106_logic except ImportError as e: print(f"⚠️ [系统警告] 无法导入 crawler_106: {e}") def run_106_logic(): return [] try: from .crawler_82 import run_82_logic except ImportError as e: print(f"⚠️ [系统警告] 无法导入 crawler_82: {e}") def run_82_logic(): return [] # 全局任务锁 task_lock = threading.Lock() def execute_monitor_task(): """ 执行所有爬虫,返回一个大列表: {'device_list': [item1, item2...], 'target_time': '...'} """ # 1. 锁机制:防止任务重复运行 if task_lock.locked(): logging.warning(">>> 任务正在运行中,跳过") print(">>> ⚠️ [调度] 任务正在运行中,本次请求跳过") return None with task_lock: start_time = datetime.now() logging.info(">>> 开始执行监控任务...") print(f"--- [任务开始] {start_time.strftime('%H:%M:%S')} ---") all_results = [] # ========================== # 2. 执行 106 爬虫 # ========================== try: print(f">>> [106爬虫] 启动...") list_106 = run_106_logic() if list_106: count = len(list_106) print(f"✅ 106爬虫获取数据: {count} 条") all_results.extend(list_106) else: print("⚠️ 106爬虫运行完成,但未返回任何数据 (空列表)") except Exception as e: print(f"❌ 106爬虫执行严重失败: {e}") traceback.print_exc() # ========================== # 3. 执行 82 爬虫 # ========================== try: print(f">>> [82爬虫] 启动...") list_82 = run_82_logic() if list_82: print(f"✅ 82爬虫获取数据: {len(list_82)} 条") # 🛠️ [补全] 82爬虫没有文件数概念,手动补0,防止入库报错 for item in list_82: if 'num_files' not in item: item['num_files'] = 0 if 'status' not in item: item['status'] = 'Unknown' all_results.extend(list_82) else: print("⚠️ 82爬虫运行完成,但未返回数据") except Exception as e: print(f"❌ 82爬虫执行严重失败: {e}") traceback.print_exc() # ========================== # 4. 汇总返回 # ========================== duration = (datetime.now() - start_time).total_seconds() logging.info(f">>> 任务完成,共获取 {len(all_results)} 条数据") print(f"--- [任务结束] 总耗时: {duration:.2f}秒 | 总计获取: {len(all_results)} 台设备 ---") return { 'device_list': all_results, 'target_time': None, # 具体时间已在 item['target_time'] 里 'temp_file_path': None # 废弃旧逻辑 }