feat: 新增物料/入库单实时 CLIP 向量提取(新建+更新),修复 I/O 延迟和路径解析静默失败

This commit is contained in:
DXC
2026-05-25 10:04:32 +08:00
parent ee9b19e72a
commit 1da4b454cd
10 changed files with 100 additions and 3 deletions

View File

@ -1,5 +1,6 @@
# app/models/base.py # app/models/base.py
from app.extensions import db from app.extensions import db
from pgvector.sqlalchemy import Vector
import json import json
@ -34,6 +35,9 @@ class MaterialBase(db.Model):
# 强制质检标记(采购入库时必须上传检测报告) # 强制质检标记(采购入库时必须上传检测报告)
is_inspection_required = db.Column(db.Boolean, default=False, comment='是否强制要求质检') is_inspection_required = db.Column(db.Boolean, default=False, comment='是否强制要求质检')
# CLIP 视觉向量(用于以图搜图)
img_embedding = db.Column(Vector(512), nullable=True)
# ============================================================ # ============================================================
# 关联关系区域 # 关联关系区域
# ============================================================ # ============================================================

View File

@ -1,5 +1,6 @@
# inventory-backend/app/models/inbound/buy.py # inventory-backend/app/models/inbound/buy.py
from app.extensions import db from app.extensions import db
from pgvector.sqlalchemy import Vector
import json import json
# 显式导入 MaterialBase 以防 relationship 找不到引用 # 显式导入 MaterialBase 以防 relationship 找不到引用
from app.models.base import MaterialBase from app.models.base import MaterialBase
@ -55,6 +56,9 @@ class StockBuy(db.Model):
# 全局打印流水号 # 全局打印流水号
global_print_id = db.Column(db.Integer) global_print_id = db.Column(db.Integer)
# CLIP 视觉向量(用于以图搜图)
arrival_image_embedding = db.Column(Vector(512), nullable=True)
# 关系定义 # 关系定义
base = db.relationship('MaterialBase', back_populates='stock_buys') base = db.relationship('MaterialBase', back_populates='stock_buys')

View File

@ -1,5 +1,6 @@
# app/models/inbound/product.py # app/models/inbound/product.py
from app.extensions import db from app.extensions import db
from pgvector.sqlalchemy import Vector
import json import json
from app.models.base import MaterialBase from app.models.base import MaterialBase
@ -58,6 +59,9 @@ class StockProduct(db.Model):
# 全局打印流水号 # 全局打印流水号
global_print_id = db.Column(db.Integer) global_print_id = db.Column(db.Integer)
# CLIP 视觉向量(用于以图搜图)
arrival_image_embedding = db.Column(Vector(512), nullable=True)
# 关系定义 # 关系定义
base = db.relationship('MaterialBase', back_populates='stock_products') base = db.relationship('MaterialBase', back_populates='stock_products')

View File

@ -1,5 +1,6 @@
# app/models/inbound/semi.py # app/models/inbound/semi.py
from app.extensions import db from app.extensions import db
from pgvector.sqlalchemy import Vector
import json import json
from app.models.base import MaterialBase from app.models.base import MaterialBase
@ -56,6 +57,9 @@ class StockSemi(db.Model):
# 全局打印流水号 # 全局打印流水号
global_print_id = db.Column(db.Integer) global_print_id = db.Column(db.Integer)
# CLIP 视觉向量(用于以图搜图)
arrival_image_embedding = db.Column(Vector(512), nullable=True)
# 关系定义 # 关系定义
base = db.relationship('MaterialBase', back_populates='stock_semis') base = db.relationship('MaterialBase', back_populates='stock_semis')

View File

@ -12,6 +12,7 @@ import traceback
import json import json
import io import io
import datetime import datetime
from app.utils.ai_vision import extract_and_embed
# 需要 pip install openpyxl # 需要 pip install openpyxl
from openpyxl import Workbook from openpyxl import Workbook
from openpyxl.styles import Font, Alignment, Border, Side, PatternFill from openpyxl.styles import Font, Alignment, Border, Side, PatternFill
@ -555,7 +556,9 @@ class MaterialBaseService:
product_image=json.dumps(data.get('generalImage', [])), product_image=json.dumps(data.get('generalImage', [])),
is_enabled=is_enabled_val is_enabled=is_enabled_val
) )
# 实时提取产品图向量(失败不影响业务)
if new_material.product_image:
new_material.img_embedding = extract_and_embed(new_material.product_image)
db.session.add(new_material) db.session.add(new_material)
db.session.commit() db.session.commit()
return new_material return new_material
@ -587,6 +590,10 @@ class MaterialBaseService:
if 'generalImage' in data: if 'generalImage' in data:
material.product_image = json.dumps(data['generalImage']) material.product_image = json.dumps(data['generalImage'])
# 补上这两行:提取新上传图片的向量!
if material.product_image:
material.img_embedding = extract_and_embed(material.product_image)
# 【核心修改】:兼容前端传来的布尔值 # 【核心修改】:兼容前端传来的布尔值
if 'isEnabled' in data: if 'isEnabled' in data:
raw_enabled = data['isEnabled'] raw_enabled = data['isEnabled']

View File

@ -9,6 +9,7 @@ from sqlalchemy import or_, func, text, and_
from sqlalchemy.exc import IntegrityError from sqlalchemy.exc import IntegrityError
import traceback import traceback
import json import json
from app.utils.ai_vision import extract_and_embed
class BuyInboundService: class BuyInboundService:
@ -177,6 +178,9 @@ class BuyInboundService:
arrival_photo=json.dumps(data.get('arrival_photo', [])), arrival_photo=json.dumps(data.get('arrival_photo', [])),
inspection_report=json.dumps(data.get('inspection_report', [])) inspection_report=json.dumps(data.get('inspection_report', []))
) )
# 实时提取到货图片向量(失败不影响业务)
if new_stock.arrival_photo:
new_stock.arrival_image_embedding = extract_and_embed(new_stock.arrival_photo)
db.session.add(new_stock) db.session.add(new_stock)
db.session.commit() db.session.commit()
return new_stock return new_stock

View File

@ -9,6 +9,7 @@ from sqlalchemy import or_, func, text, and_
from sqlalchemy.exc import IntegrityError from sqlalchemy.exc import IntegrityError
import traceback import traceback
import json import json
from app.utils.ai_vision import extract_and_embed
class ProductInboundService: class ProductInboundService:
@ -183,6 +184,9 @@ class ProductInboundService:
sale_price=float(data.get('sale_price') or 0), sale_price=float(data.get('sale_price') or 0),
order_id=data.get('order_id') order_id=data.get('order_id')
) )
# 实时提取成品实拍图向量(失败不影响业务)
if new_stock.product_photo:
new_stock.arrival_image_embedding = extract_and_embed(new_stock.product_photo)
db.session.add(new_stock) db.session.add(new_stock)
db.session.commit() db.session.commit()
return new_stock return new_stock

View File

@ -9,6 +9,7 @@ from sqlalchemy import or_, func, text, and_
from sqlalchemy.exc import IntegrityError from sqlalchemy.exc import IntegrityError
import traceback import traceback
import json import json
from app.utils.ai_vision import extract_and_embed
class SemiInboundService: class SemiInboundService:
@ -220,6 +221,9 @@ class SemiInboundService:
detail_link=data.get('detail_link'), detail_link=data.get('detail_link'),
remark=data.get('remark') remark=data.get('remark')
) )
# 实时提取到货图片向量(失败不影响业务)
if new_stock.arrival_photo:
new_stock.arrival_image_embedding = extract_and_embed(new_stock.arrival_photo)
db.session.add(new_stock) db.session.add(new_stock)
db.session.commit() db.session.commit()
return new_stock return new_stock

View File

@ -4,6 +4,8 @@ AI Vision 模块 - CLIP Vision Encoder ONNX 推理
""" """
import os import os
import json
import time
import numpy as np import numpy as np
from PIL import Image from PIL import Image
import onnxruntime as ort import onnxruntime as ort
@ -130,3 +132,61 @@ def get_image_embedding(image_path: str) -> list:
} }
) )
return outputs[0][0].tolist() return outputs[0][0].tolist()
# ============================================================================
# 通用向量提取工具:防呆、防错
# ============================================================================
def extract_and_embed(photo_source):
if not photo_source:
return None
try:
# 1. 提取基础字符串
photo_source_str = str(photo_source).strip()
raw_path = ""
# 尝试剥掉 JSON 外壳
try:
parsed = json.loads(photo_source_str)
if isinstance(parsed, list):
raw_path = parsed[0] if parsed else ""
elif isinstance(parsed, str):
raw_path = parsed
else:
raw_path = str(parsed)
except:
raw_path = photo_source_str
if not raw_path:
return None
# 2. 剥离出最纯净的文件名 (只取最后一段)
pure_filename = raw_path.split('/')[-1]
# 3. 【终极物理净化】强行抠掉所有多余的标点符号!
# 哪怕传进来的是 123.jpg"] 或者是 "123.jpg",全部洗干净
pure_filename = pure_filename.replace('"', '').replace("'", "").replace('[', '').replace(']', '')
# 4. 拼接真实的 Docker 物理路径
file_path = os.path.join('/app/uploads', pure_filename)
# 5. 加入重试机制 (最多等 3 秒)
max_retries = 6
for i in range(max_retries):
if os.path.exists(file_path):
# 文件找到了,开始提取向量
vec = get_image_embedding(file_path)
if isinstance(vec, np.ndarray):
return vec.tolist()
return vec
else:
print(f"[AI 识图等待] 第 {i+1} 次尝试,未找到文件 {file_path},等待 0.5s...")
time.sleep(0.5)
print(f"[AI 识图警告] 彻底失败!经过等待依然未找到图片: {file_path}")
except Exception as e:
print(f"[AI 识图错误] 实时提取向量失败: {str(e)}")
return None

View File

@ -28,3 +28,5 @@ APScheduler==3.10.4
pytz pytz
# [新增] 进度条库 (脚本和任务所需) # [新增] 进度条库 (脚本和任务所需)
tqdm>=4.66.0 tqdm>=4.66.0
# [新增] pgvector 向量数据库支持(以图搜图 / 实时向量提取)
pgvector>=0.2.0