feat: 新增物料/入库单实时 CLIP 向量提取(新建+更新),修复 I/O 延迟和路径解析静默失败

This commit is contained in:
DXC
2026-05-25 10:04:32 +08:00
parent ee9b19e72a
commit 1da4b454cd
10 changed files with 100 additions and 3 deletions

View File

@ -4,6 +4,8 @@ AI Vision 模块 - CLIP Vision Encoder ONNX 推理
"""
import os
import json
import time
import numpy as np
from PIL import Image
import onnxruntime as ort
@ -129,4 +131,62 @@ def get_image_embedding(image_path: str) -> list:
'attention_mask': dummy_mask
}
)
return outputs[0][0].tolist()
return outputs[0][0].tolist()
# ============================================================================
# 通用向量提取工具:防呆、防错
# ============================================================================
def extract_and_embed(photo_source):
if not photo_source:
return None
try:
# 1. 提取基础字符串
photo_source_str = str(photo_source).strip()
raw_path = ""
# 尝试剥掉 JSON 外壳
try:
parsed = json.loads(photo_source_str)
if isinstance(parsed, list):
raw_path = parsed[0] if parsed else ""
elif isinstance(parsed, str):
raw_path = parsed
else:
raw_path = str(parsed)
except:
raw_path = photo_source_str
if not raw_path:
return None
# 2. 剥离出最纯净的文件名 (只取最后一段)
pure_filename = raw_path.split('/')[-1]
# 3. 【终极物理净化】强行抠掉所有多余的标点符号!
# 哪怕传进来的是 123.jpg"] 或者是 "123.jpg",全部洗干净
pure_filename = pure_filename.replace('"', '').replace("'", "").replace('[', '').replace(']', '')
# 4. 拼接真实的 Docker 物理路径
file_path = os.path.join('/app/uploads', pure_filename)
# 5. 加入重试机制 (最多等 3 秒)
max_retries = 6
for i in range(max_retries):
if os.path.exists(file_path):
# 文件找到了,开始提取向量
vec = get_image_embedding(file_path)
if isinstance(vec, np.ndarray):
return vec.tolist()
return vec
else:
print(f"[AI 识图等待] 第 {i+1} 次尝试,未找到文件 {file_path},等待 0.5s...")
time.sleep(0.5)
print(f"[AI 识图警告] 彻底失败!经过等待依然未找到图片: {file_path}")
except Exception as e:
print(f"[AI 识图错误] 实时提取向量失败: {str(e)}")
return None