feat: 添加以图搜图功能（CLIP ONNX + pgvector）+ Dify会话修复 + 版本升至V3.30

2026-05-21 14:09:57 +08:00
parent 621431dcb9
commit 1a7c06f197
11 changed files with 804 additions and 25 deletions
--- a/inventory-backend/app/init.py
+++ b/inventory-backend/app/init.py
@ -90,6 +90,17 @@ def create_app():
    except ImportError as e:
        print(f"❌ 错误: Upload 模块导入失败: {e}")

+    # -----------------------------------------------------
+    # 2.4 注册以图搜图模块 (Image Search)
+    # -----------------------------------------------------
+    try:
+        from app.api.v1.common.image_search import image_search_bp
+        app.register_blueprint(image_search_bp, url_prefix='/api/v1/common')
+        app.register_blueprint(image_search_bp, url_prefix='/api/common', name='image_search_legacy')
+        print("✅ Image Search 模块注册成功")
+    except ImportError as e:
+        print(f"❌ 错误: Image Search 模块导入失败: {e}")
+
    # -----------------------------------------------------
    # 2.4 注册业务操作模块 (Transactions - 借还/维修/报废)
    # -----------------------------------------------------
--- a/inventory-backend/app/api/v1/common/image_search.py
+++ b/inventory-backend/app/api/v1/common/image_search.py
@ -0,0 +1,126 @@
+# -*- coding: utf-8 -*-
+"""
+以图搜图 API - CLIP Vision Embedding + pgvector 余弦距离检索
+"""
+
+import os
+import uuid
+import json
+from flask import Blueprint, request, jsonify
+from sqlalchemy import text
+from app.extensions import db
+from app.utils.ai_vision import load_clip_model, get_image_embedding
+
+# 注册蓝图
+image_search_bp = Blueprint('image_search', __name__)
+
+
+# ============================================================================
+# POST /api/v1/common/image-search
+# 以图搜图：上传图片 → CLIP embedding → pgvector 余弦相似度检索
+# ============================================================================
+
+@image_search_bp.route('/image-search', methods=['POST'])
+def image_search():
+    # ---------------------------------------------------------
+    # 1. 检查文件
+    # ---------------------------------------------------------
+    if 'file' not in request.files:
+        return jsonify({"code": 400, "msg": "未找到图片文件"}), 400
+
+    file = request.files['file']
+    if file.filename == '':
+        return jsonify({"code": 400, "msg": "未选择文件"}), 400
+
+    # ---------------------------------------------------------
+    # 2. 安全保存临时文件
+    # ---------------------------------------------------------
+    ext = file.filename.rsplit('.', 1)[-1].lower()
+    if ext not in {'png', 'jpg', 'jpeg', 'gif', 'bmp', 'webp'}:
+        return jsonify({"code": 400, "msg": "不支持的图片格式"}), 400
+
+    tmp_filename = f"{uuid.uuid4().hex}.{ext}"
+    tmp_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'uploads')
+    os.makedirs(tmp_dir, exist_ok=True)
+    tmp_path = os.path.join(tmp_dir, tmp_filename)
+
+    try:
+        file.save(tmp_path)
+        print(f"💾 [ImageSearch] 临时文件已保存: {tmp_path}")
+
+        # ---------------------------------------------------------
+        # 3. 提取 CLIP embedding
+        # ---------------------------------------------------------
+        load_clip_model()
+        embedding = get_image_embedding(tmp_path)
+        print(f"✅ [ImageSearch] Embedding 提取成功，维度: {len(embedding)}")
+
+    except Exception as e:
+        print(f"❌ [ImageSearch] 图像处理失败: {e}")
+        return jsonify({"code": 500, "msg": f"图像处理失败: {str(e)}"}), 500
+
+    finally:
+        # ---------------------------------------------------------
+        # 4. 无论成功与否，都删除临时文件
+        # ---------------------------------------------------------
+        if os.path.exists(tmp_path):
+            try:
+                os.remove(tmp_path)
+                print(f"🗑️ [ImageSearch] 临时文件已清理: {tmp_path}")
+            except Exception as e:
+                print(f"⚠️ [ImageSearch] 临时文件删除失败: {e}")
+
+    # ---------------------------------------------------------
+    # 5. pgvector 余弦相似度检索
+    # ---------------------------------------------------------
+    try:
+        # 将 Python list 转为 PostgreSQL 向量格式: '[0.1, 0.2, ...]'
+        query_vector_str = '[' + ','.join(str(v) for v in embedding) + ']'
+
+        sql = text("""
+            SELECT id, name, spec_model, product_image,
+                   (1 - (img_embedding <=> :query_vector)) AS similarity
+            FROM material_base
+            WHERE img_embedding IS NOT NULL
+            ORDER BY img_embedding <=> :query_vector
+            LIMIT 5
+        """)
+
+        result = db.session.execute(sql, {"query_vector": query_vector_str})
+        rows = result.fetchall()
+
+        results = []
+        for row in rows:
+            product_id = row[0]
+            product_name = row[1] or ""
+            spec_model = row[2] or ""
+            product_image = row[3]
+
+            # 解析图片 URL 列表，取第一张
+            image_url = ""
+            if product_image:
+                try:
+                    image_list = json.loads(product_image)
+                    if image_list and len(image_list) > 0:
+                        image_url = image_list[0]
+                except Exception:
+                    image_url = str(product_image)
+
+            results.append({
+                "product_id": product_id,
+                "product_name": product_name,
+                "spec_model": spec_model,
+                "image_url": image_url,
+                "similarity": round(float(row[4]), 4)
+            })
+
+        print(f"✅ [ImageSearch] 检索完成，命中 {len(results)} 条结果")
+        return jsonify({
+            "code": 200,
+            "msg": "检索成功",
+            "data": results
+        })
+
+    except Exception as e:
+        print(f"❌ [ImageSearch] 数据库检索失败: {e}")
+        return jsonify({"code": 500, "msg": f"检索失败: {str(e)}"}), 500
--- a/inventory-backend/app/services/dify_permission_service.py
+++ b/inventory-backend/app/services/dify_permission_service.py
@ -11,6 +11,8 @@ Dify 智能客服权限服务层
   - 跨模块越权查询：直接阻断，返回角色专属的错误信息给大模型
 """

+from typing import Optional
+
 from flask import g, current_app
 from flask_jwt_extended import decode_token
 from app.models.system import SysRolePermission
@ -185,7 +187,7 @@ class DifyPermissionService:
        返回:
            {
                'blocked': bool,           # 是否被拦截
-                'message': str | None,     # AI 应返回给用户的错误信息（如果有）
+                'message': Optional[str],     # AI 应返回给用户的错误信息（如果有）
            }
        """
        if DifyPermissionService.is_super_admin(role):
--- a/inventory-backend/app/services/export_service/excel_task.py
+++ b/inventory-backend/app/services/export_service/excel_task.py
@ -20,6 +20,8 @@ import logging
 from threading import Thread
 from datetime import datetime

+from typing import Optional
+
 from openpyxl import Workbook
 from openpyxl.styles import Font, PatternFill, Alignment, Border, Side

@ -346,7 +348,7 @@ def get_task_status(task_id: str) -> dict:
 # 获取导出文件路径（供下载接口调用）
 # =============================================================================

-def get_export_filepath(task_id: str) -> str | None:
+def get_export_filepath(task_id: str) -> Optional[str]:
    """
    根据 task_id 返回已生成文件的完整路径。
    未完成或不存在返回 None。
--- a/inventory-backend/app/utils/ai_vision.py
+++ b/inventory-backend/app/utils/ai_vision.py
@ -0,0 +1,132 @@
+# -*- coding: utf-8 -*-
+"""
+AI Vision 模块 - CLIP Vision Encoder ONNX 推理
+"""
+
+import os
+import numpy as np
+from PIL import Image
+import onnxruntime as ort
+
+# ============================================================================
+# 全局模型单例（项目启动时加载一次）
+# ============================================================================
+
+MODEL_PATH = os.path.join(os.path.dirname(__file__), '..', '..', 'models', 'clip_vision.onnx')
+
+# 加载选项：CPU 推理，禁用依赖库的启动开销
+_session_options = ort.SessionOptions()
+_session_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
+
+ort_session: ort.InferenceSession = None
+
+
+def load_clip_model():
+    """启动时调用：全局加载 CLIP Vision 模型"""
+    global ort_session
+    if ort_session is not None:
+        return ort_session
+
+    if not os.path.exists(MODEL_PATH):
+        raise FileNotFoundError(f"CLIP Vision 模型未找到: {MODEL_PATH}")
+
+    ort_session = ort.InferenceSession(MODEL_PATH, sess_options=_session_options, providers=['CPUExecutionProvider'])
+    print(f"✅ [AI Vision] CLIP 模型加载成功: {MODEL_PATH}")
+    return ort_session
+
+
+# ============================================================================
+# CLIP 预处理常量
+# ============================================================================
+
+# ImageNet 标准归一化（CLIP 官方）
+IMAGENET_MEAN = [0.485, 0.456, 0.406]
+IMAGENET_STD = [0.229, 0.224, 0.225]
+
+# 模型输入尺寸
+INPUT_SIZE = 224
+
+
+def _center_crop_and_resize(image: Image.Image) -> Image.Image:
+    """
+    CLIP 官方预处理：中心裁剪抗干扰
+    - 将图片最短边缩放到 224
+    - 从正中间切取 224x224 区域
+    """
+    w, h = image.size
+
+    # 计算缩放后的目标尺寸
+    if w < h:
+        new_w = INPUT_SIZE
+        new_h = int(h * INPUT_SIZE / w)
+    else:
+        new_h = INPUT_SIZE
+        new_w = int(w * INPUT_SIZE / h)
+
+    # 缩放
+    image = image.resize((new_w, new_h), Image.BILINEAR)
+
+    # 中心裁剪
+    left = (new_w - INPUT_SIZE) // 2
+    top = (new_h - INPUT_SIZE) // 2
+    right = left + INPUT_SIZE
+    bottom = top + INPUT_SIZE
+
+    return image.crop((left, top, right, bottom))
+
+
+def _normalize(image_np: np.ndarray) -> np.ndarray:
+    """
+    对 224x224x3 图像进行 CLIP 标准归一化
+    image_np: shape (H, W, C), dtype uint8, 值域 [0, 255]
+    返回: shape (C, H, W), dtype float32, 值域 [0, 1]
+    """
+    # HWC -> CHW
+    image_np = image_np.transpose(2, 0, 1).astype(np.float32) / 255.0
+
+    # 归一化
+    for i, (mean, std) in enumerate(zip(IMAGENET_MEAN, IMAGENET_STD)):
+        image_np[i] = (image_np[i] - mean) / std
+
+    return image_np
+
+
+# ============================================================================
+# 主函数：提取图像 embedding
+# ============================================================================
+
+def get_image_embedding(image_path: str) -> list:
+    """
+    提取图像的 512 维 CLIP embedding 向量
+
+    参数:
+        image_path: 图像文件路径（支持本地路径或 URL）
+
+    返回:
+        list: 512 维浮点向量
+    """
+    if ort_session is None:
+        load_clip_model()
+
+    # 加载图像
+    try:
+        image = Image.open(image_path).convert('RGB')
+    except Exception as e:
+        raise ValueError(f"图像加载失败: {image_path}, 错误: {e}")
+
+    # 中心裁剪
+    image = _center_crop_and_resize(image)
+
+    # 归一化
+    input_data = _normalize(np.array(image))
+
+    # 添加 batch 维度: (C, H, W) -> (1, C, H, W)
+    input_data = np.expand_dims(input_data, axis=0)
+
+    # 推理
+    outputs = ort_session.run(None, {'images': input_data.astype(np.float32)})
+
+    # 输出通常是 (1, 512) 的向量，取第一项并展平为 list
+    embedding = outputs[0][0].tolist()
+
+    return embedding