Files
ZDXX/2_1banben/services/core.py

95 lines
3.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# services/core.py
import logging
import threading
import traceback
from datetime import datetime
# 动态导入,防止文件缺失导致整个程序启动失败
try:
from .crawler_106 import run_106_logic
except ImportError:
print("⚠️ 警告: 未找到 crawler_106 模块")
def run_106_logic():
return []
try:
from .crawler_82 import run_82_logic
except ImportError:
print("⚠️ 警告: 未找到 crawler_82 模块")
def run_82_logic():
return []
task_lock = threading.Lock()
def execute_monitor_task():
"""
执行所有爬虫,返回一个大列表:
{'device_list': [item1, item2...], 'target_time': '...'}
"""
# 1. 锁机制:防止任务重复运行
if task_lock.locked():
logging.warning(">>> 任务正在运行中,跳过")
print(">>> ⚠️ 任务正在运行中,本次请求跳过")
return None
with task_lock:
logging.info(">>> 开始执行监控任务...")
print(f"--- [任务开始] {datetime.now().strftime('%H:%M:%S')} ---")
all_results = []
# ==========================
# 2. 执行 106 爬虫
# ==========================
try:
list_106 = run_106_logic()
if list_106:
count = len(list_106)
print(f"✅ 106爬虫获取数据: {count}")
# 🔍 [调试] 打印第一条数据,确认 num_files 是否存在
if count > 0:
first = list_106[0]
print(f" [调试检查] 106样本: {first.get('name')} | num_files={first.get('num_files')}")
all_results.extend(list_106)
else:
print("⚠️ 106爬虫未返回数据")
except Exception as e:
print(f"❌ 106爬虫执行失败: {e}")
traceback.print_exc()
# ==========================
# 3. 执行 82 爬虫
# ==========================
try:
list_82 = run_82_logic()
if list_82:
print(f"✅ 82爬虫获取数据: {len(list_82)}")
# 🛠️ [补全] 82爬虫没有文件数概念手动补0防止入库报错
for item in list_82:
if 'num_files' not in item:
item['num_files'] = 0
all_results.extend(list_82)
except Exception as e:
print(f"❌ 82爬虫执行失败: {e}")
traceback.print_exc()
# ==========================
# 4. 汇总返回
# ==========================
logging.info(f">>> 任务完成,共获取 {len(all_results)} 条数据")
print(f"--- [任务结束] 总计获取: {len(all_results)} 台设备 ---")
return {
'device_list': all_results,
'target_time': None, # 具体时间已在 item['target_time'] 里
'temp_file_path': None # 废弃旧逻辑,文件路径已在 item['temp_file'] 里
}