版本变更V3.35将图像的处理统一更换到新表当中

This commit is contained in:
dxc
2026-05-26 11:28:26 +08:00
parent 682139bab8
commit fb5b8d873b
9 changed files with 281 additions and 70 deletions

View File

@ -19,6 +19,14 @@ from app.models.base import MaterialBase
# 注册蓝图
image_search_bp = Blueprint('image_search', __name__)
# ============================================================================
# 可配置参数
# ============================================================================
# 以图搜图相似度阈值:余弦距离必须小于此值(距离越小越相似)
# 即余弦相似度 = 1 - 距离,必须 > (1 - SIMILARITY_THRESHOLD)
# 默认 0.25 对应余弦相似度 > 0.75
SIMILARITY_DISTANCE_THRESHOLD = 0.40
# ============================================================================
# POST /api/v1/common/image-search
@ -87,27 +95,80 @@ def image_search():
ie.module_name,
ie.target_id,
ie.image_url,
(1 - (ie.embedding <=> :query_vector)) AS similarity
(1 - (ie.embedding <=> :query_vector)) AS similarity,
(ie.embedding <=> :query_vector) AS distance
FROM image_embeddings ie
WHERE ie.embedding IS NOT NULL
AND (ie.embedding <=> :query_vector) < :distance_threshold
ORDER BY ie.embedding <=> :query_vector
LIMIT 200
""")
raw_records = db.session.execute(sql, {"query_vector": query_vector_str}).fetchall()
raw_records = db.session.execute(sql, {
"query_vector": query_vector_str,
"distance_threshold": SIMILARITY_DISTANCE_THRESHOLD
}).fetchall()
if not raw_records:
return jsonify({"code": 200, "data": []})
return jsonify({"code": 200, "data": [], "msg": "未找到相似图片(阈值过滤后)"})
# 按 (module_name, target_id) 去重,每业务记录只保留最相似的那张图
seen = {}
# ---------------------------------------------------------
# Step 1: 初步去重(同入库单只保留最相似的图片)
# ---------------------------------------------------------
first_img_seen = {}
unique_records = []
for row in raw_records:
key = (row.module_name, row.target_id)
if key not in seen:
seen[key] = row
if key not in first_img_seen:
first_img_seen[key] = True
unique_records.append(row)
# 批量回填业务数据
# ---------------------------------------------------------
# Step 2: 按物料维度去重(相同物料只保留第一条 = 相似度最高的那条)
# ---------------------------------------------------------
target_ids_by_module = {}
for row in seen.values():
for row in unique_records:
target_ids_by_module.setdefault(row.module_name, []).append(row.target_id)
# 查询每条记录的 base_id跨 stock_buy/semi/product/material_base
base_id_map = {}
for module in ('stock_buy', 'stock_semi', 'stock_product'):
if module not in target_ids_by_module:
continue
ids = target_ids_by_module[module]
ModelCls = StockBuy if module == 'stock_buy' else (StockSemi if module == 'stock_semi' else StockProduct)
id_col = getattr(ModelCls, 'id')
base_col = getattr(ModelCls, 'base_id')
rows = (
db.session.query(id_col, base_col)
.outerjoin(MaterialBase, base_col == MaterialBase.id)
.filter(id_col.in_(ids))
.all()
)
for rec_id, base_id in rows:
base_id_map[(module, rec_id)] = base_id
if 'material_base' in target_ids_by_module:
for rec_id in target_ids_by_module['material_base']:
base_id_map[('material_base', rec_id)] = rec_id
# 按 base_id 去重:相同物料只保留第一张图
material_seen = {}
final_records = []
for row in unique_records:
base_id = base_id_map.get((row.module_name, row.target_id))
if base_id is not None and base_id in material_seen:
continue
if base_id is not None:
material_seen[base_id] = True
final_records.append(row)
# ---------------------------------------------------------
# Step 3: 批量回填业务数据(基于去重后的 final_records
# ---------------------------------------------------------
target_ids_by_module = {}
for row in final_records:
target_ids_by_module.setdefault(row.module_name, []).append(row.target_id)
business_map = {}
@ -205,9 +266,9 @@ def image_search():
'url': '/material/index',
}
# 组装最终返回
# 组装最终返回(基于 final_records按相似度从高到低
results = []
for row in seen.values():
for row in final_records:
key = (row.module_name, row.target_id)
biz = business_map.get(key, {})
raw_url = row.image_url or ''