feat: 添加以图搜图功能(CLIP ONNX + pgvector)+ Dify会话修复 + 版本升至V3.30
This commit is contained in:
@ -90,6 +90,17 @@ def create_app():
|
||||
except ImportError as e:
|
||||
print(f"❌ 错误: Upload 模块导入失败: {e}")
|
||||
|
||||
# -----------------------------------------------------
|
||||
# 2.4 注册以图搜图模块 (Image Search)
|
||||
# -----------------------------------------------------
|
||||
try:
|
||||
from app.api.v1.common.image_search import image_search_bp
|
||||
app.register_blueprint(image_search_bp, url_prefix='/api/v1/common')
|
||||
app.register_blueprint(image_search_bp, url_prefix='/api/common', name='image_search_legacy')
|
||||
print("✅ Image Search 模块注册成功")
|
||||
except ImportError as e:
|
||||
print(f"❌ 错误: Image Search 模块导入失败: {e}")
|
||||
|
||||
# -----------------------------------------------------
|
||||
# 2.4 注册业务操作模块 (Transactions - 借还/维修/报废)
|
||||
# -----------------------------------------------------
|
||||
|
||||
126
inventory-backend/app/api/v1/common/image_search.py
Normal file
126
inventory-backend/app/api/v1/common/image_search.py
Normal file
@ -0,0 +1,126 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
以图搜图 API - CLIP Vision Embedding + pgvector 余弦距离检索
|
||||
"""
|
||||
|
||||
import os
|
||||
import uuid
|
||||
import json
|
||||
from flask import Blueprint, request, jsonify
|
||||
from sqlalchemy import text
|
||||
from app.extensions import db
|
||||
from app.utils.ai_vision import load_clip_model, get_image_embedding
|
||||
|
||||
# 注册蓝图
|
||||
image_search_bp = Blueprint('image_search', __name__)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# POST /api/v1/common/image-search
|
||||
# 以图搜图:上传图片 → CLIP embedding → pgvector 余弦相似度检索
|
||||
# ============================================================================
|
||||
|
||||
@image_search_bp.route('/image-search', methods=['POST'])
|
||||
def image_search():
|
||||
# ---------------------------------------------------------
|
||||
# 1. 检查文件
|
||||
# ---------------------------------------------------------
|
||||
if 'file' not in request.files:
|
||||
return jsonify({"code": 400, "msg": "未找到图片文件"}), 400
|
||||
|
||||
file = request.files['file']
|
||||
if file.filename == '':
|
||||
return jsonify({"code": 400, "msg": "未选择文件"}), 400
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# 2. 安全保存临时文件
|
||||
# ---------------------------------------------------------
|
||||
ext = file.filename.rsplit('.', 1)[-1].lower()
|
||||
if ext not in {'png', 'jpg', 'jpeg', 'gif', 'bmp', 'webp'}:
|
||||
return jsonify({"code": 400, "msg": "不支持的图片格式"}), 400
|
||||
|
||||
tmp_filename = f"{uuid.uuid4().hex}.{ext}"
|
||||
tmp_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'uploads')
|
||||
os.makedirs(tmp_dir, exist_ok=True)
|
||||
tmp_path = os.path.join(tmp_dir, tmp_filename)
|
||||
|
||||
try:
|
||||
file.save(tmp_path)
|
||||
print(f"💾 [ImageSearch] 临时文件已保存: {tmp_path}")
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# 3. 提取 CLIP embedding
|
||||
# ---------------------------------------------------------
|
||||
load_clip_model()
|
||||
embedding = get_image_embedding(tmp_path)
|
||||
print(f"✅ [ImageSearch] Embedding 提取成功,维度: {len(embedding)}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ [ImageSearch] 图像处理失败: {e}")
|
||||
return jsonify({"code": 500, "msg": f"图像处理失败: {str(e)}"}), 500
|
||||
|
||||
finally:
|
||||
# ---------------------------------------------------------
|
||||
# 4. 无论成功与否,都删除临时文件
|
||||
# ---------------------------------------------------------
|
||||
if os.path.exists(tmp_path):
|
||||
try:
|
||||
os.remove(tmp_path)
|
||||
print(f"🗑️ [ImageSearch] 临时文件已清理: {tmp_path}")
|
||||
except Exception as e:
|
||||
print(f"⚠️ [ImageSearch] 临时文件删除失败: {e}")
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# 5. pgvector 余弦相似度检索
|
||||
# ---------------------------------------------------------
|
||||
try:
|
||||
# 将 Python list 转为 PostgreSQL 向量格式: '[0.1, 0.2, ...]'
|
||||
query_vector_str = '[' + ','.join(str(v) for v in embedding) + ']'
|
||||
|
||||
sql = text("""
|
||||
SELECT id, name, spec_model, product_image,
|
||||
(1 - (img_embedding <=> :query_vector)) AS similarity
|
||||
FROM material_base
|
||||
WHERE img_embedding IS NOT NULL
|
||||
ORDER BY img_embedding <=> :query_vector
|
||||
LIMIT 5
|
||||
""")
|
||||
|
||||
result = db.session.execute(sql, {"query_vector": query_vector_str})
|
||||
rows = result.fetchall()
|
||||
|
||||
results = []
|
||||
for row in rows:
|
||||
product_id = row[0]
|
||||
product_name = row[1] or ""
|
||||
spec_model = row[2] or ""
|
||||
product_image = row[3]
|
||||
|
||||
# 解析图片 URL 列表,取第一张
|
||||
image_url = ""
|
||||
if product_image:
|
||||
try:
|
||||
image_list = json.loads(product_image)
|
||||
if image_list and len(image_list) > 0:
|
||||
image_url = image_list[0]
|
||||
except Exception:
|
||||
image_url = str(product_image)
|
||||
|
||||
results.append({
|
||||
"product_id": product_id,
|
||||
"product_name": product_name,
|
||||
"spec_model": spec_model,
|
||||
"image_url": image_url,
|
||||
"similarity": round(float(row[4]), 4)
|
||||
})
|
||||
|
||||
print(f"✅ [ImageSearch] 检索完成,命中 {len(results)} 条结果")
|
||||
return jsonify({
|
||||
"code": 200,
|
||||
"msg": "检索成功",
|
||||
"data": results
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ [ImageSearch] 数据库检索失败: {e}")
|
||||
return jsonify({"code": 500, "msg": f"检索失败: {str(e)}"}), 500
|
||||
@ -11,6 +11,8 @@ Dify 智能客服权限服务层
|
||||
- 跨模块越权查询:直接阻断,返回角色专属的错误信息给大模型
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from flask import g, current_app
|
||||
from flask_jwt_extended import decode_token
|
||||
from app.models.system import SysRolePermission
|
||||
@ -185,7 +187,7 @@ class DifyPermissionService:
|
||||
返回:
|
||||
{
|
||||
'blocked': bool, # 是否被拦截
|
||||
'message': str | None, # AI 应返回给用户的错误信息(如果有)
|
||||
'message': Optional[str], # AI 应返回给用户的错误信息(如果有)
|
||||
}
|
||||
"""
|
||||
if DifyPermissionService.is_super_admin(role):
|
||||
|
||||
@ -20,6 +20,8 @@ import logging
|
||||
from threading import Thread
|
||||
from datetime import datetime
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
|
||||
|
||||
@ -346,7 +348,7 @@ def get_task_status(task_id: str) -> dict:
|
||||
# 获取导出文件路径(供下载接口调用)
|
||||
# =============================================================================
|
||||
|
||||
def get_export_filepath(task_id: str) -> str | None:
|
||||
def get_export_filepath(task_id: str) -> Optional[str]:
|
||||
"""
|
||||
根据 task_id 返回已生成文件的完整路径。
|
||||
未完成或不存在返回 None。
|
||||
|
||||
132
inventory-backend/app/utils/ai_vision.py
Normal file
132
inventory-backend/app/utils/ai_vision.py
Normal file
@ -0,0 +1,132 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
AI Vision 模块 - CLIP Vision Encoder ONNX 推理
|
||||
"""
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
import onnxruntime as ort
|
||||
|
||||
# ============================================================================
|
||||
# 全局模型单例(项目启动时加载一次)
|
||||
# ============================================================================
|
||||
|
||||
MODEL_PATH = os.path.join(os.path.dirname(__file__), '..', '..', 'models', 'clip_vision.onnx')
|
||||
|
||||
# 加载选项:CPU 推理,禁用依赖库的启动开销
|
||||
_session_options = ort.SessionOptions()
|
||||
_session_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
|
||||
|
||||
ort_session: ort.InferenceSession = None
|
||||
|
||||
|
||||
def load_clip_model():
|
||||
"""启动时调用:全局加载 CLIP Vision 模型"""
|
||||
global ort_session
|
||||
if ort_session is not None:
|
||||
return ort_session
|
||||
|
||||
if not os.path.exists(MODEL_PATH):
|
||||
raise FileNotFoundError(f"CLIP Vision 模型未找到: {MODEL_PATH}")
|
||||
|
||||
ort_session = ort.InferenceSession(MODEL_PATH, sess_options=_session_options, providers=['CPUExecutionProvider'])
|
||||
print(f"✅ [AI Vision] CLIP 模型加载成功: {MODEL_PATH}")
|
||||
return ort_session
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# CLIP 预处理常量
|
||||
# ============================================================================
|
||||
|
||||
# ImageNet 标准归一化(CLIP 官方)
|
||||
IMAGENET_MEAN = [0.485, 0.456, 0.406]
|
||||
IMAGENET_STD = [0.229, 0.224, 0.225]
|
||||
|
||||
# 模型输入尺寸
|
||||
INPUT_SIZE = 224
|
||||
|
||||
|
||||
def _center_crop_and_resize(image: Image.Image) -> Image.Image:
|
||||
"""
|
||||
CLIP 官方预处理:中心裁剪抗干扰
|
||||
- 将图片最短边缩放到 224
|
||||
- 从正中间切取 224x224 区域
|
||||
"""
|
||||
w, h = image.size
|
||||
|
||||
# 计算缩放后的目标尺寸
|
||||
if w < h:
|
||||
new_w = INPUT_SIZE
|
||||
new_h = int(h * INPUT_SIZE / w)
|
||||
else:
|
||||
new_h = INPUT_SIZE
|
||||
new_w = int(w * INPUT_SIZE / h)
|
||||
|
||||
# 缩放
|
||||
image = image.resize((new_w, new_h), Image.BILINEAR)
|
||||
|
||||
# 中心裁剪
|
||||
left = (new_w - INPUT_SIZE) // 2
|
||||
top = (new_h - INPUT_SIZE) // 2
|
||||
right = left + INPUT_SIZE
|
||||
bottom = top + INPUT_SIZE
|
||||
|
||||
return image.crop((left, top, right, bottom))
|
||||
|
||||
|
||||
def _normalize(image_np: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
对 224x224x3 图像进行 CLIP 标准归一化
|
||||
image_np: shape (H, W, C), dtype uint8, 值域 [0, 255]
|
||||
返回: shape (C, H, W), dtype float32, 值域 [0, 1]
|
||||
"""
|
||||
# HWC -> CHW
|
||||
image_np = image_np.transpose(2, 0, 1).astype(np.float32) / 255.0
|
||||
|
||||
# 归一化
|
||||
for i, (mean, std) in enumerate(zip(IMAGENET_MEAN, IMAGENET_STD)):
|
||||
image_np[i] = (image_np[i] - mean) / std
|
||||
|
||||
return image_np
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# 主函数:提取图像 embedding
|
||||
# ============================================================================
|
||||
|
||||
def get_image_embedding(image_path: str) -> list:
|
||||
"""
|
||||
提取图像的 512 维 CLIP embedding 向量
|
||||
|
||||
参数:
|
||||
image_path: 图像文件路径(支持本地路径或 URL)
|
||||
|
||||
返回:
|
||||
list: 512 维浮点向量
|
||||
"""
|
||||
if ort_session is None:
|
||||
load_clip_model()
|
||||
|
||||
# 加载图像
|
||||
try:
|
||||
image = Image.open(image_path).convert('RGB')
|
||||
except Exception as e:
|
||||
raise ValueError(f"图像加载失败: {image_path}, 错误: {e}")
|
||||
|
||||
# 中心裁剪
|
||||
image = _center_crop_and_resize(image)
|
||||
|
||||
# 归一化
|
||||
input_data = _normalize(np.array(image))
|
||||
|
||||
# 添加 batch 维度: (C, H, W) -> (1, C, H, W)
|
||||
input_data = np.expand_dims(input_data, axis=0)
|
||||
|
||||
# 推理
|
||||
outputs = ort_session.run(None, {'images': input_data.astype(np.float32)})
|
||||
|
||||
# 输出通常是 (1, 512) 的向量,取第一项并展平为 list
|
||||
embedding = outputs[0][0].tolist()
|
||||
|
||||
return embedding
|
||||
Reference in New Issue
Block a user