版本变更V3.35将图像的处理统一更换到新表当中
This commit is contained in:
@ -19,6 +19,14 @@ from app.models.base import MaterialBase
|
||||
# 注册蓝图
|
||||
image_search_bp = Blueprint('image_search', __name__)
|
||||
|
||||
# ============================================================================
|
||||
# 可配置参数
|
||||
# ============================================================================
|
||||
# 以图搜图相似度阈值:余弦距离必须小于此值(距离越小越相似)
|
||||
# 即余弦相似度 = 1 - 距离,必须 > (1 - SIMILARITY_THRESHOLD)
|
||||
# 默认 0.25 对应余弦相似度 > 0.75
|
||||
SIMILARITY_DISTANCE_THRESHOLD = 0.40
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# POST /api/v1/common/image-search
|
||||
@ -87,27 +95,80 @@ def image_search():
|
||||
ie.module_name,
|
||||
ie.target_id,
|
||||
ie.image_url,
|
||||
(1 - (ie.embedding <=> :query_vector)) AS similarity
|
||||
(1 - (ie.embedding <=> :query_vector)) AS similarity,
|
||||
(ie.embedding <=> :query_vector) AS distance
|
||||
FROM image_embeddings ie
|
||||
WHERE ie.embedding IS NOT NULL
|
||||
AND (ie.embedding <=> :query_vector) < :distance_threshold
|
||||
ORDER BY ie.embedding <=> :query_vector
|
||||
LIMIT 200
|
||||
""")
|
||||
|
||||
raw_records = db.session.execute(sql, {"query_vector": query_vector_str}).fetchall()
|
||||
raw_records = db.session.execute(sql, {
|
||||
"query_vector": query_vector_str,
|
||||
"distance_threshold": SIMILARITY_DISTANCE_THRESHOLD
|
||||
}).fetchall()
|
||||
if not raw_records:
|
||||
return jsonify({"code": 200, "data": []})
|
||||
return jsonify({"code": 200, "data": [], "msg": "未找到相似图片(阈值过滤后)"})
|
||||
|
||||
# 按 (module_name, target_id) 去重,每业务记录只保留最相似的那张图
|
||||
seen = {}
|
||||
# ---------------------------------------------------------
|
||||
# Step 1: 初步去重(同入库单只保留最相似的图片)
|
||||
# ---------------------------------------------------------
|
||||
first_img_seen = {}
|
||||
unique_records = []
|
||||
for row in raw_records:
|
||||
key = (row.module_name, row.target_id)
|
||||
if key not in seen:
|
||||
seen[key] = row
|
||||
if key not in first_img_seen:
|
||||
first_img_seen[key] = True
|
||||
unique_records.append(row)
|
||||
|
||||
# 批量回填业务数据
|
||||
# ---------------------------------------------------------
|
||||
# Step 2: 按物料维度去重(相同物料只保留第一条 = 相似度最高的那条)
|
||||
# ---------------------------------------------------------
|
||||
target_ids_by_module = {}
|
||||
for row in seen.values():
|
||||
for row in unique_records:
|
||||
target_ids_by_module.setdefault(row.module_name, []).append(row.target_id)
|
||||
|
||||
# 查询每条记录的 base_id(跨 stock_buy/semi/product/material_base)
|
||||
base_id_map = {}
|
||||
|
||||
for module in ('stock_buy', 'stock_semi', 'stock_product'):
|
||||
if module not in target_ids_by_module:
|
||||
continue
|
||||
ids = target_ids_by_module[module]
|
||||
ModelCls = StockBuy if module == 'stock_buy' else (StockSemi if module == 'stock_semi' else StockProduct)
|
||||
id_col = getattr(ModelCls, 'id')
|
||||
base_col = getattr(ModelCls, 'base_id')
|
||||
|
||||
rows = (
|
||||
db.session.query(id_col, base_col)
|
||||
.outerjoin(MaterialBase, base_col == MaterialBase.id)
|
||||
.filter(id_col.in_(ids))
|
||||
.all()
|
||||
)
|
||||
for rec_id, base_id in rows:
|
||||
base_id_map[(module, rec_id)] = base_id
|
||||
|
||||
if 'material_base' in target_ids_by_module:
|
||||
for rec_id in target_ids_by_module['material_base']:
|
||||
base_id_map[('material_base', rec_id)] = rec_id
|
||||
|
||||
# 按 base_id 去重:相同物料只保留第一张图
|
||||
material_seen = {}
|
||||
final_records = []
|
||||
for row in unique_records:
|
||||
base_id = base_id_map.get((row.module_name, row.target_id))
|
||||
if base_id is not None and base_id in material_seen:
|
||||
continue
|
||||
if base_id is not None:
|
||||
material_seen[base_id] = True
|
||||
final_records.append(row)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Step 3: 批量回填业务数据(基于去重后的 final_records)
|
||||
# ---------------------------------------------------------
|
||||
target_ids_by_module = {}
|
||||
for row in final_records:
|
||||
target_ids_by_module.setdefault(row.module_name, []).append(row.target_id)
|
||||
|
||||
business_map = {}
|
||||
@ -205,9 +266,9 @@ def image_search():
|
||||
'url': '/material/index',
|
||||
}
|
||||
|
||||
# 组装最终返回
|
||||
# 组装最终返回(基于 final_records,按相似度从高到低)
|
||||
results = []
|
||||
for row in seen.values():
|
||||
for row in final_records:
|
||||
key = (row.module_name, row.target_id)
|
||||
biz = business_map.get(key, {})
|
||||
raw_url = row.image_url or ''
|
||||
|
||||
Reference in New Issue
Block a user