修改新增加文件数量的查询功能
This commit is contained in:
@ -1,8 +1,27 @@
|
||||
# services/core.py
|
||||
import logging
|
||||
import threading
|
||||
from .crawler_106 import run_106_logic
|
||||
from .crawler_82 import run_82_logic
|
||||
import traceback
|
||||
from datetime import datetime
|
||||
|
||||
# 动态导入,防止文件缺失导致整个程序启动失败
|
||||
try:
|
||||
from .crawler_106 import run_106_logic
|
||||
except ImportError:
|
||||
print("⚠️ 警告: 未找到 crawler_106 模块")
|
||||
|
||||
|
||||
def run_106_logic():
|
||||
return []
|
||||
|
||||
try:
|
||||
from .crawler_82 import run_82_logic
|
||||
except ImportError:
|
||||
print("⚠️ 警告: 未找到 crawler_82 模块")
|
||||
|
||||
|
||||
def run_82_logic():
|
||||
return []
|
||||
|
||||
task_lock = threading.Lock()
|
||||
|
||||
@ -12,26 +31,65 @@ def execute_monitor_task():
|
||||
执行所有爬虫,返回一个大列表:
|
||||
{'device_list': [item1, item2...], 'target_time': '...'}
|
||||
"""
|
||||
# 1. 锁机制:防止任务重复运行
|
||||
if task_lock.locked():
|
||||
logging.warning(">>> 任务正在运行中,跳过")
|
||||
print(">>> ⚠️ 任务正在运行中,本次请求跳过")
|
||||
return None
|
||||
|
||||
with task_lock:
|
||||
logging.info(">>> 开始执行监控任务...")
|
||||
print(f"--- [任务开始] {datetime.now().strftime('%H:%M:%S')} ---")
|
||||
|
||||
# 1. 获取 106 数据列表
|
||||
list_106 = run_106_logic()
|
||||
all_results = []
|
||||
|
||||
# 2. 获取 82 数据列表
|
||||
list_82 = run_82_logic()
|
||||
# ==========================
|
||||
# 2. 执行 106 爬虫
|
||||
# ==========================
|
||||
try:
|
||||
list_106 = run_106_logic()
|
||||
if list_106:
|
||||
count = len(list_106)
|
||||
print(f"✅ 106爬虫获取数据: {count} 条")
|
||||
|
||||
# 3. 合并
|
||||
combined_list = list_106 + list_82
|
||||
# 🔍 [调试] 打印第一条数据,确认 num_files 是否存在
|
||||
if count > 0:
|
||||
first = list_106[0]
|
||||
print(f" [调试检查] 106样本: {first.get('name')} | num_files={first.get('num_files')}")
|
||||
|
||||
logging.info(f">>> 任务完成,共获取 {len(combined_list)} 条数据")
|
||||
all_results.extend(list_106)
|
||||
else:
|
||||
print("⚠️ 106爬虫未返回数据")
|
||||
except Exception as e:
|
||||
print(f"❌ 106爬虫执行失败: {e}")
|
||||
traceback.print_exc()
|
||||
|
||||
# ==========================
|
||||
# 3. 执行 82 爬虫
|
||||
# ==========================
|
||||
try:
|
||||
list_82 = run_82_logic()
|
||||
if list_82:
|
||||
print(f"✅ 82爬虫获取数据: {len(list_82)} 条")
|
||||
|
||||
# 🛠️ [补全] 82爬虫没有文件数概念,手动补0,防止入库报错
|
||||
for item in list_82:
|
||||
if 'num_files' not in item:
|
||||
item['num_files'] = 0
|
||||
|
||||
all_results.extend(list_82)
|
||||
except Exception as e:
|
||||
print(f"❌ 82爬虫执行失败: {e}")
|
||||
traceback.print_exc()
|
||||
|
||||
# ==========================
|
||||
# 4. 汇总返回
|
||||
# ==========================
|
||||
logging.info(f">>> 任务完成,共获取 {len(all_results)} 条数据")
|
||||
print(f"--- [任务结束] 总计获取: {len(all_results)} 台设备 ---")
|
||||
|
||||
return {
|
||||
'device_list': combined_list,
|
||||
'target_time': None, # 具体时间已在 item 里
|
||||
'temp_file_path': None # 废弃旧逻辑,文件路径已在 item 里
|
||||
'device_list': all_results,
|
||||
'target_time': None, # 具体时间已在 item['target_time'] 里
|
||||
'temp_file_path': None # 废弃旧逻辑,文件路径已在 item['temp_file'] 里
|
||||
}
|
||||
@ -52,7 +52,7 @@ def run_106_logic():
|
||||
"""返回 result_list, 每个元素是一个字典"""
|
||||
results = []
|
||||
print(">>> [106爬虫] 启动...")
|
||||
today_str = datetime.now().strftime("%Y_%m_%d")
|
||||
# today_str = datetime.now().strftime("%Y_%m_%d") # ❌ 移除严格的“今天”判断
|
||||
main_headers = {"Authorization": CONFIG["primary_auth"], "User-Agent": "Mozilla/5.0"}
|
||||
|
||||
try:
|
||||
@ -75,7 +75,8 @@ def run_106_logic():
|
||||
'value': '',
|
||||
'target_time': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
'raw_json': {},
|
||||
'temp_file': None
|
||||
'temp_file': None,
|
||||
'num_files': 0 # ✅ 默认值
|
||||
}
|
||||
|
||||
if str(item.get('status')).lower() != 'online':
|
||||
@ -96,29 +97,40 @@ def run_106_logic():
|
||||
headers = {"Authorization": CONFIG["primary_auth"], "x-auth": token}
|
||||
api_root = "/api/resources/Data/" if is_tower_underscore else "/api/resources/data/"
|
||||
|
||||
# 1. 获取日期列表
|
||||
res1 = requests.get(f"http://106.75.72.40:{port}{api_root}", headers=headers, timeout=10)
|
||||
best_date = find_closest_item(res1.json().get('items', []), True)
|
||||
|
||||
if not best_date or best_date[2] != today_str:
|
||||
data_packet['value'] = "未找到今日文件夹"
|
||||
data_packet['target_time'] = best_date[2] if best_date else "N/A"
|
||||
# ✅ 修改点:如果找不到任何日期文件夹,才报错。否则,即使是旧日期也继续往下走。
|
||||
if not best_date:
|
||||
data_packet['value'] = "未找到任何日期文件夹"
|
||||
results.append(data_packet)
|
||||
continue
|
||||
|
||||
data_packet['target_time'] = best_date[2] # 实际数据时间
|
||||
data_packet['target_time'] = best_date[2] # 记录找到的那个日期 (比如 2026_02_02)
|
||||
date_path = f"{api_root}{best_date[2]}/"
|
||||
|
||||
# 2. 请求具体日期的文件夹内容 (这一步能获取 numFiles)
|
||||
res2 = requests.get(f"http://106.75.72.40:{port}{date_path}", headers=headers, timeout=10)
|
||||
best_file = find_closest_item(res2.json().get('items', []), False)
|
||||
folder_data = res2.json() # 获取完整JSON
|
||||
|
||||
# ✅ 核心:提取 numFiles (只要请求成功,这里一定能拿到)
|
||||
file_count = folder_data.get('numFiles', 0)
|
||||
data_packet['num_files'] = file_count
|
||||
print(f" -> {name}: 找到日期 {best_date[2]}, 文件数: {file_count}")
|
||||
|
||||
# 3. 找该文件夹里最新的文件
|
||||
best_file = find_closest_item(folder_data.get('items', []), False)
|
||||
|
||||
if not best_file:
|
||||
data_packet['value'] = "今日文件夹为空"
|
||||
data_packet['value'] = "文件夹为空" # 这种情况下 numFiles 应该是 0
|
||||
results.append(data_packet)
|
||||
continue
|
||||
|
||||
file_item = best_file[1]
|
||||
full_path = file_item.get('path') or f"{date_path}{file_item.get('name')}"
|
||||
|
||||
# 核心逻辑:获取内容
|
||||
# 4. 下载/读取内容逻辑
|
||||
if is_tower_i:
|
||||
# 下载二进制文件
|
||||
download_url = f"http://106.75.72.40:{port}/api/raw{full_path}"
|
||||
@ -129,9 +141,9 @@ def run_106_logic():
|
||||
with open(temp_path, 'wb') as f:
|
||||
f.write(res3.content)
|
||||
|
||||
data_packet['temp_file'] = temp_path # 🔥 传递给API
|
||||
data_packet['temp_file'] = temp_path
|
||||
data_packet['value'] = f"Binary Downloaded: {len(res3.content)} bytes"
|
||||
data_packet['raw_json'] = file_item # 用文件属性充当RawData
|
||||
data_packet['raw_json'] = file_item # 借用 file_item 充当 raw_json
|
||||
else:
|
||||
data_packet['status'] = '异常'
|
||||
data_packet['value'] = f"下载失败: {res3.status_code}"
|
||||
@ -141,7 +153,7 @@ def run_106_logic():
|
||||
res3 = requests.get(file_api_url, headers=headers, timeout=20)
|
||||
try:
|
||||
json_content = res3.json()
|
||||
data_packet['raw_json'] = json_content # 🔥 完整保存
|
||||
data_packet['raw_json'] = json_content
|
||||
data_packet['value'] = json_content.get('content', '')
|
||||
except:
|
||||
data_packet['value'] = "JSON解析失败"
|
||||
|
||||
Reference in New Issue
Block a user