修改自动爬取时间为17点，修改自己动爬取未写入的问题，写入存在线程阻碍导致无法写入进去，以进行修改，测试成功

2026-02-06 10:08:49 +08:00
parent 4cb503089e
commit f167bbc2f2
2 changed files with 104 additions and 112 deletions
--- a/2_1banben/app.py
+++ b/2_1banben/app.py
@ -4,7 +4,7 @@ import json
 import mimetypes
 import logging
 from datetime import datetime
-import pytz  # ✅ 必须引入：用于强制指定北京时间
+import pytz
 from flask import Flask, send_from_directory, jsonify
 from flask_cors import CORS
@ -16,11 +16,13 @@ from flask_apscheduler import APScheduler
 try:
    from config import Config
    from extensions import db
    # 引入两个模型：Device(存最新状态), DeviceHistory(存所有历史日志)
    from models import Device, DeviceHistory
    from services.core import execute_monitor_task
-    # from services.iot_api import sync_iot_data_service # 如不需要可注释
+    try:
        from services.iot_api import sync_iot_data_service
    except ImportError:
        sync_iot_data_service = None
    try:
        from routes.api import api_bp as device_bp
@ -32,217 +34,198 @@ except ImportError as e:
    print(f"❌ 严重错误: 模块导入失败。详细信息: {e}")
    sys.exit(1)
 # ==============================================================================
 # 2. 智能路径配置 (适配 PyInstaller 的 _internal 和 _MEIPASS)
 # ==============================================================================
 RESOURCE_BASE = Config.BASE_DIR
 INSTANCE_PATH = Config.INSTANCE_DIR
-# ==============================================================================
+
-# 2. 路径配置
+def find_static_folder(base_path):
-# ==============================================================================
+    """
-def get_paths():
+    全能路径搜寻逻辑，按优先级查找 web_dist
    """
    # 1. PyInstaller 打包后的特殊路径
    if getattr(sys, 'frozen', False):
-        resource_base = sys._MEIPASS
+        if hasattr(sys, '_MEIPASS'):
-        data_base = os.path.dirname(sys.executable)
+            mei_path = os.path.join(sys._MEIPASS, 'web_dist')
-    else:
+            if os.path.exists(os.path.join(mei_path, 'index.html')):
-        base = os.path.abspath(os.path.dirname(__file__))
+                return mei_path
-        resource_base = base
+
-        data_base = base
+        internal_path = os.path.join(base_path, '_internal', 'web_dist')
-    return resource_base, data_base
+        if os.path.exists(os.path.join(internal_path, 'index.html')):
            return internal_path
    # 2. 当前目录 (exe 同级)
    path = os.path.join(base_path, 'web_dist')
    if os.path.exists(os.path.join(path, 'index.html')):
        return path
    # 3. 开发环境上一级
    parent_path = os.path.join(os.path.dirname(base_path), 'web_dist')
    if os.path.exists(os.path.join(parent_path, 'index.html')):
        return parent_path
    return path
-RESOURCE_BASE, DATA_BASE = get_paths()
+STATIC_FOLDER = find_static_folder(RESOURCE_BASE)
 STATIC_FOLDER = os.path.join(RESOURCE_BASE, 'web_dist')
 INSTANCE_PATH = os.path.join(DATA_BASE, 'instance')
 mimetypes.add_type('application/javascript', '.js')
 mimetypes.add_type('text/css', '.css')
 # ==============================================================================
-# 3. 核心定时任务逻辑 (已修正：历史记录改为“永久新增”模式)
+# 3. 核心定时任务逻辑
 # ==============================================================================
 def auto_monitor_job(app):
    """
    每天 17:00 触发的爬虫任务。
-    逻辑说明：
+    修复：移除不匹配的 create_time 字段，并确保 Session 清理。
    1. Device 表：始终更新为最新状态（覆盖旧值），保证首页看到的是最新的。
    2. DeviceHistory 表：始终新增一条记录（追加模式），保留每一次爬取的历史痕迹。
    """
    # ✅ 强制使用应用上下文，确保数据库连接有效
    with app.app_context():
        # 获取当前北京时间用于日志
        tz = pytz.timezone('Asia/Shanghai')
        now_str = datetime.now(tz).strftime('%Y-%m-%d %H:%M:%S')
        print(f"\n{'=' * 50}")
        print(f"⏰ [定时任务触发] 北京时间: {now_str}")
        print(f"🚀 正在开始执行爬虫逻辑...")
        if not execute_monitor_task:
            print("❌ 错误: 未找到爬虫执行函数 (execute_monitor_task)")
            return
        try:
            # 1. 执行爬取
            task_result = execute_monitor_task()
            if not task_result:
-                print("⚠️ [警告] 爬虫执行完毕，但返回空数据 (None)")
+                print("⚠️ [警告] 爬虫执行完毕，但返回空数据")
                return
            scraped_list = task_result.get('device_list', [])
-            print(f"📦 [数据获取] 爬虫返回了 {len(scraped_list)} 条设备数据")
+            print(f"📦 [数据获取] 爬取到 {len(scraped_list)} 条设备数据")
            current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            # 统计计数
            stats = {'new_device': 0, 'history_added': 0}
            # 2. 遍历每一条爬取到的数据
            for item in scraped_list:
                d_name = item.get('name')
                if not d_name: continue
                # 获取关键数据字段
                f_count = item.get('num_files', 0)
-                target_date = item.get('target_time')  # 例如 "2026-02-04"
+                target_date = item.get('target_time')
-                # =========================================================
+                # A. 更新 Device 表
                # ✅ A. 处理 Device 表 (始终更新最新状态)
                # =========================================================
                device = Device.query.filter_by(name=d_name).first()
                if not device:
                    print(f"🆕 发现新设备，正在注册: {d_name}")
                    device = Device(name=d_name, source=item.get('source'), install_site="")
                    db.session.add(device)
-                    db.session.flush()  # 立即获取 ID
+                    db.session.flush()
                    stats['new_device'] += 1
                # 无论之前状态如何，强制更新 Device 表的实时字段
                # 这样前端首页卡片才能显示最新的 72 条
                device.status = item.get('status')
                device.current_value = item.get('value')
                device.latest_time = target_date
-                device.check_time = current_time  # 证明刚刚检查过
+                device.check_time = current_time
                device.file_count = f_count
                device.offset = calculate_offset(target_date)
-                # JSON 数据合并逻辑
+                # JSON 处理
                old_json = {}
                try:
-                    if device.json_data:
+                    if device.json_data: old_json = json.loads(device.json_data)
                        old_json = json.loads(device.json_data)
                except:
                    old_json = {}
                new_json = item.get('raw_json', {})
-                if isinstance(new_json, dict):
+                if isinstance(new_json, dict): old_json.update(new_json)
-                    old_json.update(new_json)
+                device.json_data = json.dumps(old_json, ensure_ascii=False)
                final_json_str = json.dumps(old_json, ensure_ascii=False)
                device.json_data = final_json_str
                # =========================================================
                # ✅ B. 处理 DeviceHistory 表 (纯新增逻辑)
                # =========================================================
                # 这里不进行任何查询判断，直接将当次爬取的结果作为一条新历史写入。
                # 哪怕 target_date 是一样的，create_time 也会不同，从而记录数据变化过程。
                # B. 新增 History 记录
                # [修复点] 移除了 create_time 参数，防止报错
                history_entry = DeviceHistory(
                    device_id=device.id,
                    status=item.get('status'),
                    result_data=item.get('value'),
-                    data_time=target_date,  # 数据原本的日期 (如 2026-02-04)
+                    data_time=target_date,
-                    json_data=final_json_str,  # 当时的详细JSON
+                    json_data=device.json_data,
-                    file_count=f_count,  # 当时的文件数 (如 72)
+                    file_count=f_count
-                    create_time=datetime.now()  # 记录这条日志的物理时间 (如 17:00:05)
+                    # create_time=datetime.now() # 已删除：你的 models.py 中没有定义这个字段
                )
                db.session.add(history_entry)
                stats['history_added'] += 1
-            # ✅ 3. 提交事务
+            db.session.flush()
            print(f"💾 正在提交事务到数据库...")
            db.session.commit()
-
+            print(f"✅ [入库成功] 新增设备: {stats['new_device']} | 新增历史: {stats['history_added']}")
            print(f"✅ [入库完成] 统计结果:")
            print(f"   - 新增设备: {stats['new_device']}")
            print(f"   - 新增历史记录: {stats['history_added']} (所有爬取数据均已追加)")
            print(f"{'=' * 50}\n")
        except Exception as e:
-            db.session.rollback()  # 出错回滚
+            db.session.rollback()
            print(f"❌ [严重异常] 数据写入失败: {e}")
-            import traceback
+        finally:
-            traceback.print_exc()
+            db.session.remove()
            print(f"{'=' * 50}\n")
 # ==============================================================================
 # 4. Flask 应用工厂
 # ==============================================================================
 def create_app():
    # 调试路径
    print(f"🔍 [前端路径锁定] {STATIC_FOLDER}")
    if not os.path.exists(os.path.join(STATIC_FOLDER, 'index.html')):
        print(f"❌ [严重警告] 仍然无法找到 index.html，请检查 PyInstaller 是否将 web_dist 打包进了 _internal 目录。")
    app = Flask(__name__, static_folder=STATIC_FOLDER, instance_path=INSTANCE_PATH)
    CORS(app)
    # 数据库路径配置
    if not os.path.exists(app.instance_path):
        os.makedirs(app.instance_path, exist_ok=True)
    app.config.from_object(Config)
    db_name = 'monitor_data.db'
    db_path = os.path.join(app.instance_path, db_name)
    if sys.platform.startswith('win'):
        app.config['SQLALCHEMY_DATABASE_URI'] = f'sqlite:///{db_path}'
    else:
        app.config['SQLALCHEMY_DATABASE_URI'] = f'sqlite:////{db_path}'
    app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
    # ✅ APScheduler 配置
    app.config['SCHEDULER_API_ENABLED'] = True
    app.config['SCHEDULER_TIMEZONE'] = "Asia/Shanghai"  # 全局时区设置
    db.init_app(app)
    scheduler = APScheduler()
    scheduler.init_app(app)
    scheduler.start()
    # ✅ 添加定时任务
    scheduler.add_job(
        id='daily_monitor_task',
        func=auto_monitor_job,
        args=[app],
        trigger='cron',
-        hour=17,  # 每天 17:00
+        hour=17,
        minute=0,
        second=0,
        misfire_grace_time=3600,
        timezone=pytz.timezone('Asia/Shanghai')
    )
    print(f"📅 定时任务已锁定: 每天北京时间 17:00 执行")
    app.register_blueprint(device_bp)
    # 手动触发测试接口
    @app.route('/api/force_run')
    def force_run_task():
        auto_monitor_job(app)
        return jsonify({'code': 200, 'msg': '手动触发成功，历史记录已追加'})
    # 前端路由
    @app.route('/')
    def serve_index():
        try:
            return send_from_directory(app.static_folder, 'index.html')
        except Exception:
            return "<h1>错误：找不到前端文件</h1>", 404
    @app.route('/<path:path>')
    def serve_static(path):
        file_path = os.path.join(app.static_folder, path)
        if os.path.exists(file_path):
            return send_from_directory(app.static_folder, path)
        if path.startswith('api'):
-            return jsonify({'code': 404, 'message': 'Not Found'}), 404
+            return jsonify({'code': 404, 'message': 'API endpoint not found'}), 404
        try:
            return send_from_directory(app.static_folder, 'index.html')
        except Exception:
            return "Frontend not found", 404
    with app.app_context():
        db.create_all()
@ -254,7 +237,5 @@ if __name__ == '__main__':
    app = create_app()
    debug_mode = not getattr(sys, 'frozen', False)
-    print("🚀 服务启动中 (24小时常驻模式)...")
+    print(f"🚀 服务启动中... 数据库: {app.config['SQLALCHEMY_DATABASE_URI']}")
    # use_reloader=False 避免定时任务重复执行
    app.run(host='0.0.0.0', port=5000, debug=debug_mode, use_reloader=False)
--- a/2_1banben/config.py
+++ b/2_1banben/config.py
@ -5,7 +5,9 @@ import sys
 def get_base_path():
    """获取运行时路径 (兼容打包后的 exe 和开发环境)"""
    if getattr(sys, 'frozen', False):
        # 打包后：exe 所在目录
        return os.path.dirname(sys.executable)
    # 开发时：当前文件所在目录
    return os.path.dirname(os.path.abspath(__file__))
@ -19,19 +21,28 @@ def get_static_path():
 class Config:
    BASE_DIR = get_base_path()
-    # [新增] 规范化 instance 目录
+    # 规范化 instance 目录
    INSTANCE_DIR = os.path.join(BASE_DIR, 'instance')
-    # [修改] 统一数据库路径到 instance/monitor_data.db
+    # 确保 instance 目录存在（防止第一次运行时报错）
-    # 这样爬虫和IoT数据都存这里，且不会污染根目录
+    if not os.path.exists(INSTANCE_DIR):
-    SQLALCHEMY_DATABASE_URI = f'sqlite:///{os.path.join(INSTANCE_DIR, "monitor_data.db")}'
+        try:
            os.makedirs(INSTANCE_DIR)
        except Exception:
            pass
    # [修改] 绝对路径拼接，并强制将 Windows 的 \ 转换为 /，避免 SQLite URI 报错
    # 最终结果类似: sqlite:///D:/project/instance/monitor_data.db
    _db_path = os.path.join(INSTANCE_DIR, "monitor_data.db").replace('\\', '/')
    SQLALCHEMY_DATABASE_URI = f'sqlite:///{_db_path}'
    SQLALCHEMY_TRACK_MODIFICATIONS = False
    # --- 定时任务配置 ---
    SCHEDULER_API_ENABLED = True
    SCHEDULER_TIMEZONE = "Asia/Shanghai"
-    # --- 爬虫配置 (原有) ---
+    # --- 爬虫配置 ---
    CRAWLER_CONFIG = {
        "106": {
            "base_url": "http://106.75.72.40:7500/api/proxy/tcp",
@ -44,15 +55,15 @@ class Config:
        }
    }
-    # --- [新增] IoT 物联网卡接口配置 ---
+    # --- IoT 物联网卡接口配置 ---
    # 这里的配置会被 services/iot_api.py 读取
    IOT_BASE_URL = "https://iot.huskyiot.cn"
-    IOT_APP_ID = "44aQHTpx"  # 你的 AppID
+    IOT_APP_ID = "44aQHTpx"
-    IOT_SECRET = "26833abf8786167a5cff5355cfc249981985124a"  # 你的 Secret
+    IOT_SECRET = "26833abf8786167a5cff5355cfc249981985124a"
-    IOT_USERNAME = "yrsy"  # 登录账号
+    IOT_USERNAME = "yrsy"
-    IOT_PASSWORD = "123456789"  # 登录密码
+    IOT_PASSWORD = "123456789"
    # 接口路径
    IOT_URL_LOGIN = "/iot-api/system/auth/v1/get/token"
    IOT_URL_PAGE = "/iot-api/platform/v1/card-info/query/page"
    IOT_URL_DETAIL = "/iot-api/platform/v1/card-info/query/batch-card-detail"
    # [Debug] 打印路径确认
    print(f"配置文件已加载，数据库路径: {SQLALCHEMY_DATABASE_URI}")