修改自动爬取时间为17点,修改自己动爬取未写入的问题
This commit is contained in:
@ -16,10 +16,11 @@ from flask_apscheduler import APScheduler
|
|||||||
try:
|
try:
|
||||||
from config import Config
|
from config import Config
|
||||||
from extensions import db
|
from extensions import db
|
||||||
|
# 引入两个模型:Device(存最新状态), DeviceHistory(存所有历史日志)
|
||||||
from models import Device, DeviceHistory
|
from models import Device, DeviceHistory
|
||||||
from services.core import execute_monitor_task
|
from services.core import execute_monitor_task
|
||||||
|
|
||||||
# from services.iot_api import sync_iot_data_service # 如果不需要IoT可以注释
|
# from services.iot_api import sync_iot_data_service # 如不需要可注释
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from routes.api import api_bp as device_bp
|
from routes.api import api_bp as device_bp
|
||||||
@ -55,11 +56,14 @@ mimetypes.add_type('text/css', '.css')
|
|||||||
|
|
||||||
|
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
# 3. 核心定时任务逻辑 (加强版)
|
# 3. 核心定时任务逻辑 (已修正:历史记录改为“永久新增”模式)
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
def auto_monitor_job(app):
|
def auto_monitor_job(app):
|
||||||
"""
|
"""
|
||||||
每天 17:00 触发的爬虫任务
|
每天 17:00 触发的爬虫任务。
|
||||||
|
逻辑说明:
|
||||||
|
1. Device 表:始终更新为最新状态(覆盖旧值),保证首页看到的是最新的。
|
||||||
|
2. DeviceHistory 表:始终新增一条记录(追加模式),保留每一次爬取的历史痕迹。
|
||||||
"""
|
"""
|
||||||
# ✅ 强制使用应用上下文,确保数据库连接有效
|
# ✅ 强制使用应用上下文,确保数据库连接有效
|
||||||
with app.app_context():
|
with app.app_context():
|
||||||
@ -87,34 +91,40 @@ def auto_monitor_job(app):
|
|||||||
print(f"📦 [数据获取] 爬虫返回了 {len(scraped_list)} 条设备数据")
|
print(f"📦 [数据获取] 爬虫返回了 {len(scraped_list)} 条设备数据")
|
||||||
|
|
||||||
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
success_count = 0
|
|
||||||
|
|
||||||
# 2. 遍历入库
|
# 统计计数
|
||||||
|
stats = {'new_device': 0, 'history_added': 0}
|
||||||
|
|
||||||
|
# 2. 遍历每一条爬取到的数据
|
||||||
for item in scraped_list:
|
for item in scraped_list:
|
||||||
d_name = item.get('name')
|
d_name = item.get('name')
|
||||||
if not d_name: continue
|
if not d_name: continue
|
||||||
|
|
||||||
# 查找或新建设备
|
# 获取关键数据字段
|
||||||
|
f_count = item.get('num_files', 0)
|
||||||
|
target_date = item.get('target_time') # 例如 "2026-02-04"
|
||||||
|
|
||||||
|
# =========================================================
|
||||||
|
# ✅ A. 处理 Device 表 (始终更新最新状态)
|
||||||
|
# =========================================================
|
||||||
device = Device.query.filter_by(name=d_name).first()
|
device = Device.query.filter_by(name=d_name).first()
|
||||||
if not device:
|
if not device:
|
||||||
print(f"🆕 发现新设备: {d_name},正在创建...")
|
print(f"🆕 发现新设备,正在注册: {d_name}")
|
||||||
device = Device(name=d_name, source=item.get('source'), install_site="")
|
device = Device(name=d_name, source=item.get('source'), install_site="")
|
||||||
db.session.add(device)
|
db.session.add(device)
|
||||||
db.session.flush() # 立即获取 ID
|
db.session.flush() # 立即获取 ID
|
||||||
|
stats['new_device'] += 1
|
||||||
|
|
||||||
# 更新设备状态表
|
# 无论之前状态如何,强制更新 Device 表的实时字段
|
||||||
|
# 这样前端首页卡片才能显示最新的 72 条
|
||||||
device.status = item.get('status')
|
device.status = item.get('status')
|
||||||
device.current_value = item.get('value')
|
device.current_value = item.get('value')
|
||||||
device.latest_time = item.get('target_time')
|
device.latest_time = target_date
|
||||||
device.check_time = current_time # 更新检查时间证明爬过了
|
device.check_time = current_time # 证明刚刚检查过
|
||||||
|
|
||||||
f_count = item.get('num_files', 0)
|
|
||||||
device.file_count = f_count
|
device.file_count = f_count
|
||||||
|
device.offset = calculate_offset(target_date)
|
||||||
|
|
||||||
# 计算 offset
|
# JSON 数据合并逻辑
|
||||||
device.offset = calculate_offset(item.get('target_time'))
|
|
||||||
|
|
||||||
# JSON 字段合并逻辑
|
|
||||||
old_json = {}
|
old_json = {}
|
||||||
try:
|
try:
|
||||||
if device.json_data:
|
if device.json_data:
|
||||||
@ -125,30 +135,41 @@ def auto_monitor_job(app):
|
|||||||
new_json = item.get('raw_json', {})
|
new_json = item.get('raw_json', {})
|
||||||
if isinstance(new_json, dict):
|
if isinstance(new_json, dict):
|
||||||
old_json.update(new_json)
|
old_json.update(new_json)
|
||||||
device.json_data = json.dumps(old_json, ensure_ascii=False)
|
|
||||||
|
|
||||||
# ✅ 3. 写入历史记录 (这是数据留存的关键)
|
final_json_str = json.dumps(old_json, ensure_ascii=False)
|
||||||
|
device.json_data = final_json_str
|
||||||
|
|
||||||
|
# =========================================================
|
||||||
|
# ✅ B. 处理 DeviceHistory 表 (纯新增逻辑)
|
||||||
|
# =========================================================
|
||||||
|
# 这里不进行任何查询判断,直接将当次爬取的结果作为一条新历史写入。
|
||||||
|
# 哪怕 target_date 是一样的,create_time 也会不同,从而记录数据变化过程。
|
||||||
|
|
||||||
history_entry = DeviceHistory(
|
history_entry = DeviceHistory(
|
||||||
device_id=device.id,
|
device_id=device.id,
|
||||||
status=device.status,
|
status=item.get('status'),
|
||||||
result_data=device.current_value,
|
result_data=item.get('value'),
|
||||||
data_time=item.get('target_time'), # 文件的时间
|
data_time=target_date, # 数据原本的日期 (如 2026-02-04)
|
||||||
json_data=device.json_data,
|
json_data=final_json_str, # 当时的详细JSON
|
||||||
file_count=f_count,
|
file_count=f_count, # 当时的文件数 (如 72)
|
||||||
create_time=datetime.now() # 记录入库时的系统时间
|
create_time=datetime.now() # 记录这条日志的物理时间 (如 17:00:05)
|
||||||
)
|
)
|
||||||
db.session.add(history_entry)
|
|
||||||
success_count += 1
|
|
||||||
|
|
||||||
# ✅ 4. 显式提交事务
|
db.session.add(history_entry)
|
||||||
|
stats['history_added'] += 1
|
||||||
|
|
||||||
|
# ✅ 3. 提交事务
|
||||||
print(f"💾 正在提交事务到数据库...")
|
print(f"💾 正在提交事务到数据库...")
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
print(f"✅ [成功] 已更新 {success_count} 台设备,并写入历史记录。")
|
|
||||||
|
print(f"✅ [入库完成] 统计结果:")
|
||||||
|
print(f" - 新增设备: {stats['new_device']}")
|
||||||
|
print(f" - 新增历史记录: {stats['history_added']} (所有爬取数据均已追加)")
|
||||||
print(f"{'=' * 50}\n")
|
print(f"{'=' * 50}\n")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
db.session.rollback() # 出错回滚
|
db.session.rollback() # 出错回滚
|
||||||
print(f"❌ [严重异常] 定时任务执行失败: {e}")
|
print(f"❌ [严重异常] 数据写入失败: {e}")
|
||||||
import traceback
|
import traceback
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
|
||||||
@ -186,29 +207,28 @@ def create_app():
|
|||||||
scheduler.init_app(app)
|
scheduler.init_app(app)
|
||||||
scheduler.start()
|
scheduler.start()
|
||||||
|
|
||||||
# ✅ 添加定时任务 (针对常开机环境的最稳配置)
|
# ✅ 添加定时任务
|
||||||
scheduler.add_job(
|
scheduler.add_job(
|
||||||
id='daily_monitor_task',
|
id='daily_monitor_task',
|
||||||
func=auto_monitor_job,
|
func=auto_monitor_job,
|
||||||
args=[app],
|
args=[app],
|
||||||
trigger='cron',
|
trigger='cron',
|
||||||
hour=17, # 每天 17 点
|
hour=17, # 每天 17:00
|
||||||
minute=0,
|
minute=0,
|
||||||
second=0,
|
second=0,
|
||||||
misfire_grace_time=3600, # 允许延迟1小时执行
|
misfire_grace_time=3600,
|
||||||
timezone=pytz.timezone('Asia/Shanghai') # 再次强制指定时区
|
timezone=pytz.timezone('Asia/Shanghai')
|
||||||
)
|
)
|
||||||
|
|
||||||
# 打印一下确认任务已添加
|
|
||||||
print(f"📅 定时任务已锁定: 每天北京时间 17:00 执行")
|
print(f"📅 定时任务已锁定: 每天北京时间 17:00 执行")
|
||||||
|
|
||||||
app.register_blueprint(device_bp)
|
app.register_blueprint(device_bp)
|
||||||
|
|
||||||
# 手动触发测试接口 (保留以备不时之需)
|
# 手动触发测试接口
|
||||||
@app.route('/api/force_run')
|
@app.route('/api/force_run')
|
||||||
def force_run_task():
|
def force_run_task():
|
||||||
auto_monitor_job(app)
|
auto_monitor_job(app)
|
||||||
return jsonify({'code': 200, 'msg': '手动触发成功'})
|
return jsonify({'code': 200, 'msg': '手动触发成功,历史记录已追加'})
|
||||||
|
|
||||||
# 前端路由
|
# 前端路由
|
||||||
@app.route('/')
|
@app.route('/')
|
||||||
@ -236,6 +256,5 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
print("🚀 服务启动中 (24小时常驻模式)...")
|
print("🚀 服务启动中 (24小时常驻模式)...")
|
||||||
|
|
||||||
# ✅ 关键设置: use_reloader=False
|
# use_reloader=False 避免定时任务重复执行
|
||||||
# 防止 Flask 的热重载功能启动两个进程,导致定时任务跑两遍或者被意外杀掉
|
|
||||||
app.run(host='0.0.0.0', port=5000, debug=debug_mode, use_reloader=False)
|
app.run(host='0.0.0.0', port=5000, debug=debug_mode, use_reloader=False)
|
||||||
Reference in New Issue
Block a user