feat(step8): 外部模型从单文件升级为母文件夹多模型字典扫描

This commit is contained in:
DXC
2026-06-08 09:56:02 +08:00
parent 4efe5b871e
commit 2b76d7908f
12 changed files with 935 additions and 29 deletions

View File

@ -0,0 +1,294 @@
---
name: WQ_GUI PyQt5 面板外部模型导入模式
description: 在 Step8 等预测面板中通过 QRadioButton + FileSelectWidget + joblib.load 防御性加载实现"内置/导入"双模式切换的标准模式
source: auto-skill
extracted_at: '2026-06-08T01:38:14.481Z'
---
# WQ_GUI PyQt5 面板外部模型导入模式
## 适用场景
Step8机器学习预测、Step8_5、Step8_75 等面板需要同时支持:
1. **内置模式**:使用 `step6` 训练流程生成的模型目录
2. **导入模式**:用户手动选择本地预训练 `.joblib` 文件直接加载
---
## 1. 模板(可直接复制到 `__init__` + `init_ui`
```python
from PyQt5.QtWidgets import QRadioButton
class StepXPanel(QWidget):
def __init__(self, parent=None):
super().__init__(parent)
self.current_model = None # ★ 外部模型实例缓存
self.init_ui()
def init_ui(self):
layout = QVBoxLayout()
# -------- 模型来源选择(单选按钮组) --------
source_group = QGroupBox("模型来源")
source_layout = QVBoxLayout()
self.use_trained_model = QRadioButton("使用当前训练流程的模型")
self.use_external_model = QRadioButton("导入本地预训练模型 (.joblib)")
self.use_trained_model.setChecked(True)
source_layout.addWidget(self.use_trained_model)
source_layout.addWidget(self.use_external_model)
self.use_trained_model.toggled.connect(self._on_model_source_changed)
self.use_external_model.toggled.connect(self._on_model_source_changed)
source_group.setLayout(source_layout)
layout.addWidget(source_group)
# -------- 外部模型文件选择(条件显示) --------
self.external_model_widget = FileSelectWidget(
"预训练模型:",
"Joblib Files (*.joblib);;All Files (*.*)"
)
# FileSelectWidget 的 browse_btn 默认连着 open file 行为,
# 需要先断开默认连接,再接自定义槽
self.external_model_widget.browse_btn.clicked.disconnect()
self.external_model_widget.browse_btn.clicked.connect(self._browse_external_model)
self.external_model_widget.setVisible(False)
layout.addWidget(self.external_model_widget)
# ... 其余原有 UI ...
```
---
## 2. 槽函数模板
### `_on_model_source_changed`
单选按钮 `toggled` 信号在**两个**按钮上都会触发(点击 A 时 A 触发B 也触发),所以用 `if not checked: return` 让非选中分支短路。
```python
def _on_model_source_changed(self, checked: bool):
"""单选按钮切换:控制外部模型文件选择控件的显示/隐藏"""
if not checked:
return
is_external = self.use_external_model.isChecked()
self.external_model_widget.setVisible(is_external)
# 切回"使用当前模型"时清空缓存,释放内存并避免误用旧模型
if not is_external:
self.current_model = None
```
### `_browse_external_model`
-`QFileDialog.getOpenFileName` 而非 `getExistingDirectory`
- 防御性解析两种格式:`{"model": pipeline, ...}`Step6 输出格式)和裸 `Pipeline` 对象
- 失败用 `QMessageBox.warning` 友善提示;成功用 `QMessageBox.information` 告知
```python
from PyQt5.QtWidgets import QFileDialog, QMessageBox
from pathlib import Path
def _browse_external_model(self):
"""浏览并加载外部 .joblib 预训练模型文件"""
default = self._get_default_work_dir()
path, _ = QFileDialog.getOpenFileName(
self,
"选择预训练模型 (.joblib)",
default,
"Joblib Files (*.joblib);;All Files (*.*)",
)
if not path:
return
try:
import joblib
loaded = joblib.load(path)
# 兼容两种格式dict{"model": obj} 或裸 Pipeline
if isinstance(loaded, dict) and "model" in loaded:
self.current_model = loaded["model"]
elif hasattr(loaded, "predict"):
self.current_model = loaded
else:
QMessageBox.warning(
self,
"模型格式错误",
f"无法识别的模型格式,文件内容类型为:{type(loaded).__name__}",
)
return
self.external_model_widget.set_path(path)
QMessageBox.information(
self,
"模型加载成功",
f"已加载模型:{Path(path).name}\n类型:{type(self.current_model).__name__}",
)
except Exception as e:
self.current_model = None
QMessageBox.warning(
self,
"模型加载失败",
f"加载模型时发生错误:\n{type(e).__name__}: {e}",
)
```
---
## 3. `run_step` 改造模板
在原有目录加载逻辑之前,插入外部模型优先分支:
```python
def run_step(self):
"""独立运行步骤X"""
# ... 公共输入校验 ...
# ★ 外部模型优先分支
if self.use_external_model.isChecked():
if self.current_model is None:
QMessageBox.warning(
self,
"模型未加载",
"请先点击「浏览...」按钮加载预训练模型文件!",
)
return
external_model_path = self.external_model_widget.get_path() or ""
main_window = self.window()
if hasattr(main_window, 'run_single_step'):
config = {
'stepX': self.get_config(),
'_external_model': self.current_model, # ★ 直接传对象
'_external_model_path': external_model_path, # 供日志/回溯用
}
main_window.run_single_step('stepX', config)
return
# 默认流程:使用模型目录(原有逻辑不变)
models_dir = self.models_dir_file.get_path()
if not models_dir:
QMessageBox.warning(self, "输入错误", "请选择模型目录!")
return
# ... 原有 run_step 剩余代码 ...
```
---
## 4. 后端三层完整接入2026-06-08 已落地)
完整数据流分为三层,每层各一处分流点:
```
GUI step8_panel
↓ config = {'_external_model': obj, '_external_model_path': path, 'step8': {...}}
worker_thread.run_single_step() [第1处分流透传顶层 key]
↓ step_config = config['step8'] + {'_external_model': obj, '_external_model_path': path}
prediction_step.predict_water_quality() [第2处分流接收 + 透传]
↓ _external_model=obj, _external_model_path=path
WaterQualityInference(artifacts_dir, external_model=obj, external_model_path=path)
inference_batch.batch_inference_multi_models() [第3处分流effective_model 短路]
↓ external_model=obj
inference_batch.inference_pipeline()
→ self.external_model is not None → self.loaded_model_data = self.external_model跳过磁盘加载
```
### 4a. worker_thread.py — run_single_step 透传
`step_config = dict(config.get(step_name, {}))` 之后、"skip_dependency_check" 之前插入:
```python
# 透传面板顶层传入的外部预训练模型GUI step8_panel 通过 config['_external_model'] 传入)
# 非空才覆盖(遵循 feedback_never_overwrite_with_empty 原则)
for key in ('_external_model', '_external_model_path'):
val = config.get(key)
if val is not None and val != "":
step_config[key] = val
```
### 4b. prediction_step.py — predict_water_quality 签名 + 透传
形参表末尾增加两个参数:
```python
_external_model=None,
_external_model_path=None,
```
构造处透传:
```python
inferencer = WaterQualityInference(
models_dir,
external_model=_external_model,
external_model_path=_external_model_path,
)
all_results = inferencer.batch_inference_multi_models(
models_root_dir=models_dir,
...
external_model=_external_model,
external_model_path=_external_model_path,
)
```
### 4c. inference_batch.py — 三处修改
**`__init__` 存储**
```python
def __init__(self, artifacts_dir: str = "models/artifacts",
external_model=None, external_model_path=None):
...
self.external_model = external_model
self.external_model_path = external_model_path
```
**`batch_inference_multi_models` 短路 + 注入**
```python
# 优先级:外部预训练模型 > 从磁盘加载
if external_model is not None:
effective_model = external_model
print(f"\n使用外部预训练模型: type={type(external_model).__name__}")
else:
effective_model = None
# 子目录循环中注入:
if effective_model is not None:
model_inferencer = WaterQualityInference(
str(subdir),
external_model=effective_model,
external_model_path=external_model_path,
)
else:
model_inferencer = WaterQualityInference(str(subdir))
```
**`inference_pipeline` 模型加载短路**`load_best_model` 调用前):
```python
if self.external_model is not None:
self.loaded_model_data = self.external_model
print(f" 使用外部预训练模型: type={type(self.external_model).__name__}")
elif model_file_path:
self.load_specific_model(model_file_path)
else:
self.load_best_model(metric=metric)
```
**关键约束**
- `joblib.load` 在 panel 槽函数里完成GUI 进程内),对象通过 config 引用直接透传;**不跨进程**,所以不需要担心 pickle 序列化问题
- `batch_inference_multi_models` 形参 `external_model``external_model_path` **与实例属性同名**`self.external_model`),两者都传是为了让每个子目录创建的 `WaterQualityInference` 实例都能独立持有引用
- 原有从 `models_dir` 目录加载的逻辑完全保留,只在 `external_model is not None` 时短路
---
## 5. 已知约束
- `FileSelectWidget.browse_btn.clicked``init_ui` 里会重复 connect每次 `init_ui` 被调用时会累积;解决方案是在 connect 前先 `.disconnect()`(如模板所示)。
- `QRadioButton.toggled` 信号在两个按钮上都会触发,**必须**用 `if not checked: return` 短路,否则会导致切换时状态错乱。
- `self.current_model` 会在面板切换到"使用当前模型"时清空,防止用户忘记换回内置模式后仍使用旧导入模型。
- 当前项目 venv 路径:`D:\111\office\ZHLduijie\1.WQ\WQ_GUI\venv`,导入 `joblib` 时注意 venv 环境一致性。