修改数据获取,确保json文件完整获取
This commit is contained in:
@ -1,135 +1,37 @@
|
||||
# services/core.py
|
||||
import logging
|
||||
from datetime import datetime
|
||||
import threading
|
||||
from extensions import db
|
||||
# 引入新的模型
|
||||
from models import Device, DeviceHistory
|
||||
# 引入爬虫逻辑 (保持相对导入不变)
|
||||
from .crawler_106 import run_106_logic
|
||||
from .crawler_82 import run_82_logic
|
||||
|
||||
task_lock = threading.Lock()
|
||||
|
||||
|
||||
def calculate_offset(latest_time_str):
|
||||
"""计算时间滞后天数 (保持原有逻辑)"""
|
||||
if not latest_time_str or latest_time_str == "N/A":
|
||||
return "从未同步"
|
||||
try:
|
||||
clean_date_str = str(latest_time_str).split()[0].replace('_', '-')
|
||||
target_date = datetime.strptime(clean_date_str, "%Y-%m-%d").date()
|
||||
diff = (datetime.now().date() - target_date).days
|
||||
if diff == 0: return "当天已同步"
|
||||
return f"滞后 {diff} 天"
|
||||
except:
|
||||
return "时间解析失败"
|
||||
|
||||
|
||||
def save_record_to_db(source, name, status, reason, latest_time="N/A", content=None):
|
||||
"""
|
||||
智能存储逻辑:
|
||||
1. 确保 Device 主表存在
|
||||
2. 仅当 latest_time 发生变化时,才写入 DeviceHistory
|
||||
"""
|
||||
try:
|
||||
# 1. 查询或创建主设备 (Device)
|
||||
device = Device.query.filter_by(name=name).first()
|
||||
now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
current_offset = calculate_offset(latest_time)
|
||||
|
||||
is_new_data = False
|
||||
|
||||
if not device:
|
||||
# === 新设备发现 ===
|
||||
device = Device(
|
||||
name=name,
|
||||
source=source,
|
||||
install_site="", # 默认空
|
||||
is_maintaining=False,
|
||||
is_hidden=False
|
||||
)
|
||||
db.session.add(device)
|
||||
# 需要 flush 这里的 add,以便后面生成 ID 存历史,但为了性能可以最后统一 commit
|
||||
# 这里标记为新数据,强制存一条历史
|
||||
is_new_data = True
|
||||
logging.info(f"发现新设备: {name}")
|
||||
else:
|
||||
# === 旧设备 ===
|
||||
# 判断核心逻辑:如果网站上的 latest_time 变了,说明有新数据
|
||||
if latest_time != "N/A" and device.latest_time != latest_time:
|
||||
is_new_data = True
|
||||
|
||||
# 如果网站没抓到时间(N/A),但我们库里有旧时间,我们需要更新 offset (如:昨天滞后1天,今天变滞后2天)
|
||||
if latest_time == "N/A" and device.latest_time:
|
||||
current_offset = calculate_offset(device.latest_time)
|
||||
|
||||
# 2. 更新主表快照信息 (无论是否有新数据,都要更新最后检查时间和状态)
|
||||
device.check_time = now_str
|
||||
device.status = status
|
||||
device.reason = reason
|
||||
device.offset = current_offset
|
||||
# 只有抓到有效时间才更新主表的显示时间
|
||||
if latest_time != "N/A":
|
||||
device.latest_time = latest_time
|
||||
|
||||
# 3. 如果是新数据,写入历史表 (节省空间的核心)
|
||||
if is_new_data and latest_time != "N/A":
|
||||
# 先 commit 确保 device.id 存在
|
||||
db.session.flush()
|
||||
|
||||
history = DeviceHistory(
|
||||
device_id=device.id,
|
||||
data_time=latest_time,
|
||||
status=status
|
||||
)
|
||||
db.session.add(history)
|
||||
logging.info(f"[{name}] 数据更新: {latest_time} -> 存入历史")
|
||||
|
||||
db.session.commit()
|
||||
return f"{source}_{name}"
|
||||
|
||||
except Exception as e:
|
||||
db.session.rollback()
|
||||
logging.error(f"DB Error [{name}]: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def execute_monitor_task():
|
||||
"""执行所有爬虫任务的主入口"""
|
||||
"""
|
||||
执行所有爬虫,返回一个大列表:
|
||||
{'device_list': [item1, item2...], 'target_time': '...'}
|
||||
"""
|
||||
if task_lock.locked():
|
||||
logging.warning(">>> 任务正在运行中,跳过本次调度")
|
||||
return
|
||||
logging.warning(">>> 任务正在运行中,跳过")
|
||||
return None
|
||||
|
||||
with task_lock:
|
||||
logging.info(">>> 开始执行监控任务...")
|
||||
active_set = set()
|
||||
|
||||
# 1. 运行爬虫 (传递新的 save_record_to_db)
|
||||
run_106_logic(active_set, save_record_to_db)
|
||||
run_82_logic(active_set, save_record_to_db)
|
||||
# 1. 获取 106 数据列表
|
||||
list_106 = run_106_logic()
|
||||
|
||||
# 2. 处理离线设备 (仅更新主表状态,不增加历史垃圾数据)
|
||||
try:
|
||||
# 查询所有未被隐藏且不在维修中的设备
|
||||
all_devices = Device.query.all()
|
||||
now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
# 2. 获取 82 数据列表
|
||||
list_82 = run_82_logic()
|
||||
|
||||
for dev in all_devices:
|
||||
key = f"{dev.source}_{dev.name}"
|
||||
# 3. 合并
|
||||
combined_list = list_106 + list_82
|
||||
|
||||
# 如果设备在维修中,或者刚才爬到了,就跳过
|
||||
if dev.is_maintaining or (key in active_set):
|
||||
continue
|
||||
logging.info(f">>> 任务完成,共获取 {len(combined_list)} 条数据")
|
||||
|
||||
# 没爬到 -> 标记为离线
|
||||
dev.status = "已离线"
|
||||
dev.reason = "设备本次扫描未响应"
|
||||
dev.check_time = now_str
|
||||
# 注意:这里我们只改状态,不往 History 插数据,防止离线时疯狂增加重复记录
|
||||
|
||||
db.session.commit()
|
||||
except Exception as e:
|
||||
db.session.rollback()
|
||||
logging.error(f"离线状态更新失败: {e}")
|
||||
|
||||
logging.info(">>> 监控任务完成。")
|
||||
return {
|
||||
'device_list': combined_list,
|
||||
'target_time': None, # 具体时间已在 item 里
|
||||
'temp_file_path': None # 废弃旧逻辑,文件路径已在 item 里
|
||||
}
|
||||
Reference in New Issue
Block a user