refactor: 提取 WorkspaceManager,将文件扫描与路径业务逻辑从主 GUI 解耦
This commit is contained in:
231
src/core/workspace_manager.py
Normal file
231
src/core/workspace_manager.py
Normal file
@ -0,0 +1,231 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
工作空间管理器
|
||||||
|
|
||||||
|
负责工作目录文件扫描、步骤输出路径发现、配置裁剪等业务逻辑,
|
||||||
|
与 GUI 组件解耦,不直接引用任何 UI 类。
|
||||||
|
"""
|
||||||
|
|
||||||
|
import copy
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
class WorkspaceManager:
|
||||||
|
"""管理步骤默认输出路径、文件扫描与配置裁剪"""
|
||||||
|
|
||||||
|
# 白名单:科学数据格式后缀
|
||||||
|
SCIENTIFIC_EXTENSIONS = {'.dat', '.tif', '.tiff', '.shp'}
|
||||||
|
# 临时文件关键词黑名单
|
||||||
|
TMP_KEYWORDS = ('__tmp', '_tmp')
|
||||||
|
# 掩膜类型集合
|
||||||
|
MASK_TYPES = {'water_mask', 'glint_mask', 'boundary_mask'}
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.step_default_outputs = {
|
||||||
|
'step1': "1_water_mask/water_mask_from_ndwi.dat",
|
||||||
|
'step2': "2_Glint_Detection/severe_glint_area.dat",
|
||||||
|
'step3': "3_deglint/deglint_goodman.bsq",
|
||||||
|
'step4_sampling': "4_sampling/sampling_spectra.csv",
|
||||||
|
'step5_clean': "5_Data_Cleaning/processed_data.csv",
|
||||||
|
'step6_feature': "6_Spectral_Feature_Extraction/training_spectra.csv",
|
||||||
|
'step7_index': "7_Water_Quality_Indices/training_spectra_indices.csv",
|
||||||
|
'step8_ml_train': "8_Supervised_Model_Training/",
|
||||||
|
'step9_ml_predict': "9_ML_Prediction/",
|
||||||
|
'step10_watercolor': "10_WaterIndex_Images/",
|
||||||
|
'step11_map': "14_visualization/"
|
||||||
|
}
|
||||||
|
self.step_outputs = {}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _is_scientific_mask(path_str):
|
||||||
|
"""白名单判断:只有 .dat .tif .tiff .shp 才算科学数据格式"""
|
||||||
|
p = Path(path_str)
|
||||||
|
name_lower = str(path_str).lower()
|
||||||
|
if any(kw in name_lower for kw in WorkspaceManager.TMP_KEYWORDS):
|
||||||
|
return False
|
||||||
|
return p.suffix.lower() in WorkspaceManager.SCIENTIFIC_EXTENSIONS
|
||||||
|
|
||||||
|
def find_step_output(self, work_path, step_id, output_type, ref_img_path=None):
|
||||||
|
"""查找指定步骤的输出文件
|
||||||
|
|
||||||
|
Args:
|
||||||
|
work_path: 工作目录 Path 对象
|
||||||
|
step_id: 步骤 ID
|
||||||
|
output_type: 输出类型(如 'water_mask', 'deglint_image' 等)
|
||||||
|
ref_img_path: 参考影像路径(仅 output_type='reference_img' 时需要)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
找到的文件路径字符串,或 None
|
||||||
|
"""
|
||||||
|
if step_id not in self.step_default_outputs:
|
||||||
|
return None
|
||||||
|
|
||||||
|
raw = self.step_default_outputs[step_id]
|
||||||
|
|
||||||
|
rel_path = None
|
||||||
|
if isinstance(raw, str):
|
||||||
|
rel_path = raw
|
||||||
|
elif isinstance(raw, dict):
|
||||||
|
rel_path = raw.get(output_type) or list(raw.values())[0]
|
||||||
|
|
||||||
|
if not rel_path:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# 特殊处理:从 step_outputs 记录中查找实际输出路径
|
||||||
|
if step_id in self.step_outputs:
|
||||||
|
actual_outputs = self.step_outputs[step_id]
|
||||||
|
if output_type in actual_outputs:
|
||||||
|
candidate = actual_outputs[output_type]
|
||||||
|
if output_type in self.MASK_TYPES and not self._is_scientific_mask(candidate):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
return candidate
|
||||||
|
|
||||||
|
if output_type == 'water_mask':
|
||||||
|
if rel_path:
|
||||||
|
mask_path = work_path / rel_path
|
||||||
|
if mask_path.exists():
|
||||||
|
return str(mask_path)
|
||||||
|
elif output_type == 'reference_img':
|
||||||
|
if ref_img_path and Path(ref_img_path).exists():
|
||||||
|
return ref_img_path
|
||||||
|
elif output_type == 'deglint_image':
|
||||||
|
if rel_path:
|
||||||
|
deglint_path = work_path / rel_path
|
||||||
|
if deglint_path.exists():
|
||||||
|
return str(deglint_path)
|
||||||
|
deglint_dir = work_path / "3_deglint"
|
||||||
|
if deglint_dir.exists():
|
||||||
|
for file_path in deglint_dir.glob("deglint_*.bsq"):
|
||||||
|
return str(file_path)
|
||||||
|
for file_path in deglint_dir.glob("interpolated_*.bsq"):
|
||||||
|
return str(file_path)
|
||||||
|
elif rel_path:
|
||||||
|
if rel_path.endswith('/'):
|
||||||
|
output_path = work_path / rel_path.rstrip('/')
|
||||||
|
if output_path.exists() and output_path.is_dir():
|
||||||
|
return str(output_path)
|
||||||
|
else:
|
||||||
|
output_path = work_path / rel_path
|
||||||
|
if output_path.exists():
|
||||||
|
return str(output_path)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def scan_work_directory_for_files(self, work_path):
|
||||||
|
"""扫描工作目录,自动发现各步骤的输出文件
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
discovered_outputs: dict, {step_id: {output_type: path_str}}
|
||||||
|
"""
|
||||||
|
discovered_outputs = {}
|
||||||
|
|
||||||
|
subdirs = {
|
||||||
|
'1_water_mask': 'step1',
|
||||||
|
'2_Glint_Detection': 'step2',
|
||||||
|
'3_deglint': 'step3',
|
||||||
|
'5_Data_Cleaning': 'step5_clean',
|
||||||
|
'6_Spectral_Feature_Extraction': 'step6_feature',
|
||||||
|
'7_Water_Quality_Indices': 'step7_index',
|
||||||
|
'8_Supervised_Model_Training': 'step8_ml_train',
|
||||||
|
'8_Regression_Modeling': 'step8_ml_train',
|
||||||
|
'13_Custom_Regression': 'step13',
|
||||||
|
'9_ML_Prediction': 'step9_ml_predict',
|
||||||
|
'11_12_13_predictions/Non_Empirical_Prediction': 'step11_map',
|
||||||
|
'13_Custom_Regression/Custom_Regression_Prediction': 'step13',
|
||||||
|
'14_visualization': 'step13_report',
|
||||||
|
'10_geotiff_batch_rendering': 'step11_map'
|
||||||
|
}
|
||||||
|
|
||||||
|
for subdir, step_ids in subdirs.items():
|
||||||
|
subdir_path = work_path / subdir
|
||||||
|
if not subdir_path.exists():
|
||||||
|
continue
|
||||||
|
|
||||||
|
if isinstance(step_ids, str):
|
||||||
|
step_ids = [step_ids]
|
||||||
|
|
||||||
|
for file_path in subdir_path.rglob('*'):
|
||||||
|
if file_path.is_file():
|
||||||
|
file_name = file_path.name.lower()
|
||||||
|
|
||||||
|
for step_id in step_ids:
|
||||||
|
if step_id not in discovered_outputs:
|
||||||
|
discovered_outputs[step_id] = {}
|
||||||
|
|
||||||
|
if 'water_mask' in file_name and step_id == 'step1':
|
||||||
|
if self._is_scientific_mask(file_path):
|
||||||
|
discovered_outputs[step_id]['water_mask'] = str(file_path)
|
||||||
|
elif 'glint' in file_name and 'mask' in file_name and step_id == 'step2':
|
||||||
|
if self._is_scientific_mask(file_path):
|
||||||
|
discovered_outputs[step_id]['glint_mask'] = str(file_path)
|
||||||
|
elif 'deglint' in file_name and step_id == 'step3':
|
||||||
|
discovered_outputs[step_id]['deglint_image'] = str(file_path)
|
||||||
|
elif 'processed_data' in file_name and step_id == 'step4_sampling':
|
||||||
|
discovered_outputs[step_id]['processed_data'] = str(file_path)
|
||||||
|
elif 'training_spectra' in file_name and step_id == 'step5_clean':
|
||||||
|
discovered_outputs[step_id]['training_spectra'] = str(file_path)
|
||||||
|
elif 'water_quality_indices' in file_name and step_id == 'step6_feature':
|
||||||
|
discovered_outputs[step_id]['water_indices'] = str(file_path)
|
||||||
|
elif 'sampling_spectra' in file_name and step_id == 'step4_sampling':
|
||||||
|
discovered_outputs[step_id]['sampling_points'] = str(file_path)
|
||||||
|
elif file_name.endswith('.csv') and step_id in ['step9_ml_predict', 'step11_map', 'step12_viz']:
|
||||||
|
discovered_outputs[step_id]['predictions'] = str(file_path)
|
||||||
|
|
||||||
|
for step_id, outputs in discovered_outputs.items():
|
||||||
|
if step_id not in self.step_outputs:
|
||||||
|
self.step_outputs[step_id] = {}
|
||||||
|
self.step_outputs[step_id].update(outputs)
|
||||||
|
|
||||||
|
return discovered_outputs
|
||||||
|
|
||||||
|
def update_step_outputs(self, step_name, work_path):
|
||||||
|
"""更新指定步骤的输出路径记录"""
|
||||||
|
if step_name not in self.step_default_outputs:
|
||||||
|
return
|
||||||
|
|
||||||
|
step_outputs = self.step_default_outputs[step_name]
|
||||||
|
|
||||||
|
for output_type, relative_path in step_outputs.items():
|
||||||
|
if '*' in relative_path:
|
||||||
|
pattern_path = work_path / relative_path.replace('*', '*')
|
||||||
|
matching_files = list(pattern_path.parent.glob(pattern_path.name))
|
||||||
|
if matching_files:
|
||||||
|
latest_file = max(matching_files, key=lambda p: p.stat().st_mtime)
|
||||||
|
self.step_outputs[step_name][output_type] = str(latest_file)
|
||||||
|
else:
|
||||||
|
output_path = work_path / relative_path
|
||||||
|
if output_path.exists():
|
||||||
|
self.step_outputs[step_name][output_type] = str(output_path)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def prune_config_for_prediction_mode(config: dict) -> dict:
|
||||||
|
"""Prediction-only 模式:禁用训练相关步骤,保留预测和成图步骤。
|
||||||
|
|
||||||
|
被禁用的 step dict 中统一写入 'enabled': False,
|
||||||
|
这些配置最终传给 PipelineRunner,Runner 会跳过它们。
|
||||||
|
同时,被跳过的步骤的 required_input_files 在 build_missing_items
|
||||||
|
中不会被检查,从而自然规避了"CSV 缺失"等训练模式下的误报。
|
||||||
|
|
||||||
|
Args:
|
||||||
|
config: 完整配置字典(来自 get_current_config)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
裁剪后的 config(深拷贝,原 config 不被修改)
|
||||||
|
"""
|
||||||
|
cfg = copy.deepcopy(config)
|
||||||
|
|
||||||
|
training_steps = [
|
||||||
|
"step4",
|
||||||
|
"step5",
|
||||||
|
"step7",
|
||||||
|
"step6",
|
||||||
|
"step8_non_empirical_modeling",
|
||||||
|
"step9",
|
||||||
|
]
|
||||||
|
for step_id in training_steps:
|
||||||
|
step_cfg = cfg.setdefault(step_id, {})
|
||||||
|
step_cfg["enabled"] = False
|
||||||
|
|
||||||
|
return cfg
|
||||||
@ -158,6 +158,7 @@ from src.gui.core.worker_thread import (
|
|||||||
from src.gui.core.preflight_dialog import PreflightDialog
|
from src.gui.core.preflight_dialog import PreflightDialog
|
||||||
from src.gui.core.pipeline_mode_dialog import PipelineModeDialog
|
from src.gui.core.pipeline_mode_dialog import PipelineModeDialog
|
||||||
from src.gui.core.viz_thread import VisualizationWorkerThread, _viz_training_spectra_csv_path
|
from src.gui.core.viz_thread import VisualizationWorkerThread, _viz_training_spectra_csv_path
|
||||||
|
from src.core.workspace_manager import WorkspaceManager
|
||||||
|
|
||||||
|
|
||||||
class WaterQualityGUI(QMainWindow):
|
class WaterQualityGUI(QMainWindow):
|
||||||
@ -183,10 +184,10 @@ class WaterQualityGUI(QMainWindow):
|
|||||||
# 训练数据模式状态
|
# 训练数据模式状态
|
||||||
self.has_training_data = True # 默认有训练数据
|
self.has_training_data = True # 默认有训练数据
|
||||||
|
|
||||||
# 步骤输出路径记录
|
# 工作空间管理器(文件扫描、路径发现、配置裁剪)
|
||||||
self.step_outputs = {} # 记录每个步骤的输出路径
|
self.workspace_manager = WorkspaceManager()
|
||||||
|
|
||||||
# 定义步骤依赖关系和标准输出路径
|
# 定义步骤依赖关系
|
||||||
self._init_step_dependencies()
|
self._init_step_dependencies()
|
||||||
|
|
||||||
self.init_ui()
|
self.init_ui()
|
||||||
@ -198,22 +199,7 @@ class WaterQualityGUI(QMainWindow):
|
|||||||
QTimer.singleShot(100, self.init_workspace)
|
QTimer.singleShot(100, self.init_workspace)
|
||||||
|
|
||||||
def _init_step_dependencies(self):
|
def _init_step_dependencies(self):
|
||||||
"""初始化步骤依赖关系和标准输出路径"""
|
"""初始化步骤依赖关系"""
|
||||||
# 定义每个步骤的标准输出路径模式(相对于工作目录)
|
|
||||||
self.step_default_outputs = {
|
|
||||||
'step1': "1_water_mask/water_mask_from_ndwi.dat",
|
|
||||||
'step2': "2_Glint_Detection/severe_glint_area.dat",
|
|
||||||
'step3': "3_deglint/deglint_goodman.bsq",
|
|
||||||
'step4_sampling': "4_sampling/sampling_spectra.csv",
|
|
||||||
'step5_clean': "5_Data_Cleaning/processed_data.csv",
|
|
||||||
'step6_feature': "6_Spectral_Feature_Extraction/training_spectra.csv",
|
|
||||||
'step7_index': "7_Water_Quality_Indices/training_spectra_indices.csv",
|
|
||||||
'step8_ml_train': "8_Supervised_Model_Training/",
|
|
||||||
'step9_ml_predict': "9_ML_Prediction/",
|
|
||||||
'step10_watercolor': "10_WaterIndex_Images/",
|
|
||||||
'step11_map': "14_visualization/"
|
|
||||||
}
|
|
||||||
|
|
||||||
# 依赖关系字典结构:
|
# 依赖关系字典结构:
|
||||||
# '当前步骤ID': { '依赖参数名': ('上游步骤ID', '上游输出类型/Key', '当前步骤接收该路径的组件属性名') }
|
# '当前步骤ID': { '依赖参数名': ('上游步骤ID', '上游输出类型/Key', '当前步骤接收该路径的组件属性名') }
|
||||||
self.step_dependencies = {
|
self.step_dependencies = {
|
||||||
@ -1082,6 +1068,10 @@ class WaterQualityGUI(QMainWindow):
|
|||||||
dependencies = self.step_dependencies[step_id]
|
dependencies = self.step_dependencies[step_id]
|
||||||
filled_count = 0
|
filled_count = 0
|
||||||
|
|
||||||
|
ref_img_path = None
|
||||||
|
if hasattr(self, 'step1_panel'):
|
||||||
|
ref_img_path = self.step1_panel.img_file.get_path()
|
||||||
|
|
||||||
for input_field, (dep_step, output_type, panel_attr) in dependencies.items():
|
for input_field, (dep_step, output_type, panel_attr) in dependencies.items():
|
||||||
# 检查面板是否有对应的属性
|
# 检查面板是否有对应的属性
|
||||||
if not hasattr(panel, panel_attr):
|
if not hasattr(panel, panel_attr):
|
||||||
@ -1101,7 +1091,7 @@ class WaterQualityGUI(QMainWindow):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# 查找依赖步骤的输出文件
|
# 查找依赖步骤的输出文件
|
||||||
output_path = self.find_step_output(work_path, dep_step, output_type)
|
output_path = self.workspace_manager.find_step_output(work_path, dep_step, output_type, ref_img_path=ref_img_path)
|
||||||
|
|
||||||
if output_path and Path(output_path).exists():
|
if output_path and Path(output_path).exists():
|
||||||
# ★ 兼容 FileSelectWidget 与原生 QLineEdit
|
# ★ 兼容 FileSelectWidget 与原生 QLineEdit
|
||||||
@ -1132,173 +1122,6 @@ class WaterQualityGUI(QMainWindow):
|
|||||||
}
|
}
|
||||||
return panel_map.get(step_id)
|
return panel_map.get(step_id)
|
||||||
|
|
||||||
def find_step_output(self, work_path, step_id, output_type):
|
|
||||||
"""查找指定步骤的输出文件"""
|
|
||||||
if step_id not in self.step_default_outputs:
|
|
||||||
return None
|
|
||||||
|
|
||||||
raw = self.step_default_outputs[step_id]
|
|
||||||
|
|
||||||
# ★ 兼容扁平化后的纯字符串路径格式
|
|
||||||
rel_path = None
|
|
||||||
if isinstance(raw, str):
|
|
||||||
rel_path = raw
|
|
||||||
elif isinstance(raw, dict):
|
|
||||||
rel_path = raw.get(output_type) or list(raw.values())[0]
|
|
||||||
|
|
||||||
if not rel_path:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# ★ 掩膜类型列表:这些类型只接受科学数据格式
|
|
||||||
mask_types = {'water_mask', 'glint_mask', 'boundary_mask'}
|
|
||||||
# ★ 白名单机制:只允许 .dat .tif .tiff .shp,拒绝其他一切格式
|
|
||||||
scientific_extensions = {'.dat', '.tif', '.tiff', '.shp'}
|
|
||||||
# ★ 临时文件关键词黑名单
|
|
||||||
tmp_keywords = ('__tmp', '_tmp')
|
|
||||||
|
|
||||||
def _is_scientific_mask(path_str):
|
|
||||||
"""白名单判断:只有 .dat .tif .tiff .shp 才算科学数据格式"""
|
|
||||||
p = Path(path_str)
|
|
||||||
name_lower = str(path_str).lower()
|
|
||||||
# 拒绝临时文件
|
|
||||||
if any(kw in name_lower for kw in tmp_keywords):
|
|
||||||
return False
|
|
||||||
# 白名单校验
|
|
||||||
return p.suffix.lower() in scientific_extensions
|
|
||||||
|
|
||||||
# 特殊处理:从step_outputs记录中查找实际输出路径
|
|
||||||
if step_id in self.step_outputs:
|
|
||||||
actual_outputs = self.step_outputs[step_id]
|
|
||||||
if output_type in actual_outputs:
|
|
||||||
candidate = actual_outputs[output_type]
|
|
||||||
# ★ 掩膜类型白名单二次校验:不在白名单内的一律拒绝
|
|
||||||
if output_type in mask_types and not _is_scientific_mask(candidate):
|
|
||||||
# 非科学格式被拒绝,不使用 step_outputs 中的值
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
return candidate
|
|
||||||
|
|
||||||
# 根据输出类型查找对应的文件
|
|
||||||
if output_type == 'water_mask':
|
|
||||||
# 水域掩膜:直接用统一路径
|
|
||||||
if rel_path:
|
|
||||||
mask_path = work_path / rel_path
|
|
||||||
if mask_path.exists():
|
|
||||||
return str(mask_path)
|
|
||||||
elif output_type == 'reference_img':
|
|
||||||
# 参考影像:从step1的配置中获取用户输入的影像路径
|
|
||||||
if hasattr(self, 'step1_panel'):
|
|
||||||
img_path = self.step1_panel.img_file.get_path()
|
|
||||||
if img_path and Path(img_path).exists():
|
|
||||||
return img_path
|
|
||||||
elif output_type == 'deglint_image':
|
|
||||||
# 去耀斑影像:直接用统一路径
|
|
||||||
if rel_path:
|
|
||||||
deglint_path = work_path / rel_path
|
|
||||||
if deglint_path.exists():
|
|
||||||
return str(deglint_path)
|
|
||||||
# 还要检查 Kutser 算法输出与插值方法生成的文件
|
|
||||||
deglint_dir = work_path / "3_deglint"
|
|
||||||
if deglint_dir.exists():
|
|
||||||
for file_path in deglint_dir.glob("deglint_*.bsq"):
|
|
||||||
return str(file_path)
|
|
||||||
for file_path in deglint_dir.glob("interpolated_*.bsq"):
|
|
||||||
return str(file_path)
|
|
||||||
elif rel_path:
|
|
||||||
# 直接匹配的输出类型(统一使用 rel_path)
|
|
||||||
if rel_path.endswith('/'):
|
|
||||||
# 是目录
|
|
||||||
output_path = work_path / rel_path.rstrip('/')
|
|
||||||
if output_path.exists() and output_path.is_dir():
|
|
||||||
return str(output_path)
|
|
||||||
else:
|
|
||||||
# 是文件
|
|
||||||
output_path = work_path / rel_path
|
|
||||||
if output_path.exists():
|
|
||||||
return str(output_path)
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
def scan_work_directory_for_files(self, work_path):
|
|
||||||
"""扫描工作目录,自动发现各步骤的输出文件"""
|
|
||||||
discovered_outputs = {}
|
|
||||||
|
|
||||||
# 扫描各个子目录
|
|
||||||
subdirs = {
|
|
||||||
'1_water_mask': 'step1',
|
|
||||||
'2_Glint_Detection': 'step2',
|
|
||||||
'3_deglint': 'step3',
|
|
||||||
'5_Data_Cleaning': 'step5_clean',
|
|
||||||
'6_Spectral_Feature_Extraction': 'step6_feature',
|
|
||||||
'7_Water_Quality_Indices': 'step7_index',
|
|
||||||
'8_Supervised_Model_Training': 'step8_ml_train',
|
|
||||||
'8_Regression_Modeling': 'step8_ml_train',
|
|
||||||
'13_Custom_Regression': 'step13',
|
|
||||||
'9_ML_Prediction': 'step9_ml_predict',
|
|
||||||
'11_12_13_predictions/Non_Empirical_Prediction': 'step11_map',
|
|
||||||
'13_Custom_Regression/Custom_Regression_Prediction': 'step13',
|
|
||||||
'14_visualization': 'step13_report',
|
|
||||||
'10_geotiff_batch_rendering': 'step11_map'
|
|
||||||
}
|
|
||||||
|
|
||||||
for subdir, step_ids in subdirs.items():
|
|
||||||
subdir_path = work_path / subdir
|
|
||||||
if not subdir_path.exists():
|
|
||||||
continue
|
|
||||||
|
|
||||||
if isinstance(step_ids, str):
|
|
||||||
step_ids = [step_ids]
|
|
||||||
|
|
||||||
# 扫描该目录下的文件
|
|
||||||
for file_path in subdir_path.rglob('*'):
|
|
||||||
if file_path.is_file():
|
|
||||||
file_name = file_path.name.lower()
|
|
||||||
|
|
||||||
# 根据文件名模式判断输出类型
|
|
||||||
for step_id in step_ids:
|
|
||||||
if step_id not in discovered_outputs:
|
|
||||||
discovered_outputs[step_id] = {}
|
|
||||||
|
|
||||||
# ★ 掩膜文件白名单过滤:只有 .dat .tif .tiff .shp 才通过,拒绝 .hdr .xml .png 等
|
|
||||||
scientific_extensions = {'.dat', '.tif', '.tiff', '.shp'}
|
|
||||||
tmp_keywords = ('__tmp', '_tmp')
|
|
||||||
|
|
||||||
def _is_scientific_mask(path_str):
|
|
||||||
"""白名单判断:拒绝 .hdr .xml 临时文件等,只接受科学数据格式"""
|
|
||||||
p = Path(path_str)
|
|
||||||
name_lower = str(path_str).lower()
|
|
||||||
if any(kw in name_lower for kw in tmp_keywords):
|
|
||||||
return False
|
|
||||||
return p.suffix.lower() in scientific_extensions
|
|
||||||
|
|
||||||
# 匹配不同的文件类型
|
|
||||||
if 'water_mask' in file_name and step_id == 'step1':
|
|
||||||
if _is_scientific_mask(file_path):
|
|
||||||
discovered_outputs[step_id]['water_mask'] = str(file_path)
|
|
||||||
elif 'glint' in file_name and 'mask' in file_name and step_id == 'step2':
|
|
||||||
if _is_scientific_mask(file_path):
|
|
||||||
discovered_outputs[step_id]['glint_mask'] = str(file_path)
|
|
||||||
elif 'deglint' in file_name and step_id == 'step3':
|
|
||||||
discovered_outputs[step_id]['deglint_image'] = str(file_path)
|
|
||||||
elif 'processed_data' in file_name and step_id == 'step4_sampling':
|
|
||||||
discovered_outputs[step_id]['processed_data'] = str(file_path)
|
|
||||||
elif 'training_spectra' in file_name and step_id == 'step5_clean':
|
|
||||||
discovered_outputs[step_id]['training_spectra'] = str(file_path)
|
|
||||||
elif 'water_quality_indices' in file_name and step_id == 'step6_feature':
|
|
||||||
discovered_outputs[step_id]['water_indices'] = str(file_path)
|
|
||||||
elif 'sampling_spectra' in file_name and step_id == 'step4_sampling':
|
|
||||||
discovered_outputs[step_id]['sampling_points'] = str(file_path)
|
|
||||||
elif file_name.endswith('.csv') and step_id in ['step9_ml_predict', 'step11_map', 'step12_viz']:
|
|
||||||
discovered_outputs[step_id]['predictions'] = str(file_path)
|
|
||||||
|
|
||||||
# 更新内部记录
|
|
||||||
for step_id, outputs in discovered_outputs.items():
|
|
||||||
if step_id not in self.step_outputs:
|
|
||||||
self.step_outputs[step_id] = {}
|
|
||||||
self.step_outputs[step_id].update(outputs)
|
|
||||||
|
|
||||||
return discovered_outputs
|
|
||||||
|
|
||||||
def auto_populate_all_steps(self):
|
def auto_populate_all_steps(self):
|
||||||
"""自动填充所有步骤的输入路径"""
|
"""自动填充所有步骤的输入路径"""
|
||||||
work_dir = getattr(self, 'work_dir', './work_dir')
|
work_dir = getattr(self, 'work_dir', './work_dir')
|
||||||
@ -1309,7 +1132,7 @@ class WaterQualityGUI(QMainWindow):
|
|||||||
return
|
return
|
||||||
|
|
||||||
# 首先扫描工作目录发现已有的输出文件
|
# 首先扫描工作目录发现已有的输出文件
|
||||||
self.scan_work_directory_for_files(work_path)
|
self.workspace_manager.scan_work_directory_for_files(work_path)
|
||||||
|
|
||||||
step_order = ['step2', 'step3', 'step4_sampling', 'step5_clean', 'step6_feature', 'step7_index',
|
step_order = ['step2', 'step3', 'step4_sampling', 'step5_clean', 'step6_feature', 'step7_index',
|
||||||
'step8_ml_train', 'step9_ml_predict', 'step11_map', 'step12_viz', 'step13_report']
|
'step8_ml_train', 'step9_ml_predict', 'step11_map', 'step12_viz', 'step13_report']
|
||||||
@ -1612,41 +1435,6 @@ class WaterQualityGUI(QMainWindow):
|
|||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
|
||||||
# ★ 全流程模式动态裁剪
|
|
||||||
# ------------------------------------------------------------------
|
|
||||||
|
|
||||||
def _prune_config_for_prediction_mode(self, config: dict) -> dict:
|
|
||||||
"""Prediction-only 模式:禁用训练相关步骤,保留预测和成图步骤。
|
|
||||||
|
|
||||||
被禁用的 step dict 中统一写入 'enabled': False,
|
|
||||||
这些配置最终传给 PipelineRunner,Runner 会跳过它们。
|
|
||||||
同时,被跳过的步骤的 required_input_files 在 build_missing_items
|
|
||||||
中不会被检查,从而自然规避了"CSV 缺失"等训练模式下的误报。
|
|
||||||
|
|
||||||
Args:
|
|
||||||
config: 完整配置字典(来自 get_current_config)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
裁剪后的 config(深拷贝,原 config 不被修改)
|
|
||||||
"""
|
|
||||||
cfg = copy.deepcopy(config)
|
|
||||||
|
|
||||||
# 在每个训练相关步骤的 dict 中写入 enabled=False
|
|
||||||
training_steps = [
|
|
||||||
"step4", # CSV 实测数据清洗
|
|
||||||
"step5", # 实测点光谱提取(→ training_csv_path)
|
|
||||||
"step7", # ML 监督建模
|
|
||||||
"step6", # 水质指数计算(辅助训练)
|
|
||||||
"step8_non_empirical_modeling", # 非经验回归建模
|
|
||||||
"step9", # 自定义回归建模
|
|
||||||
]
|
|
||||||
for step_id in training_steps:
|
|
||||||
step_cfg = cfg.setdefault(step_id, {})
|
|
||||||
step_cfg["enabled"] = False
|
|
||||||
|
|
||||||
return cfg
|
|
||||||
|
|
||||||
def run_full_pipeline(self):
|
def run_full_pipeline(self):
|
||||||
"""运行完整流程"""
|
"""运行完整流程"""
|
||||||
if not PIPELINE_AVAILABLE:
|
if not PIPELINE_AVAILABLE:
|
||||||
@ -1665,7 +1453,7 @@ class WaterQualityGUI(QMainWindow):
|
|||||||
# ── 1) 运行前智能预检与自动回填(硬盘已有产物自动跳过) ──
|
# ── 1) 运行前智能预检与自动回填(硬盘已有产物自动跳过) ──
|
||||||
work_path = Path(work_dir)
|
work_path = Path(work_dir)
|
||||||
self.log_message("正在进行运行前环境预检与自动扫描...", "info")
|
self.log_message("正在进行运行前环境预检与自动扫描...", "info")
|
||||||
self.scan_work_directory_for_files(work_path)
|
self.workspace_manager.scan_work_directory_for_files(work_path)
|
||||||
self.auto_populate_all_steps()
|
self.auto_populate_all_steps()
|
||||||
self.log_message("✓ 预检完成:已扫描工作目录并自动回填已落盘的产物", "info")
|
self.log_message("✓ 预检完成:已扫描工作目录并自动回填已落盘的产物", "info")
|
||||||
|
|
||||||
@ -1685,7 +1473,7 @@ class WaterQualityGUI(QMainWindow):
|
|||||||
|
|
||||||
# ── 2.1) ★ 根据模式动态裁剪配置 ──
|
# ── 2.1) ★ 根据模式动态裁剪配置 ──
|
||||||
if selected_mode == "prediction_only":
|
if selected_mode == "prediction_only":
|
||||||
config = self._prune_config_for_prediction_mode(config)
|
config = self.workspace_manager.prune_config_for_prediction_mode(config)
|
||||||
self.log_message("[模式选择] 已裁剪训练相关步骤(step4/5/7/8),进入仅预测模式", "info")
|
self.log_message("[模式选择] 已裁剪训练相关步骤(step4/5/7/8),进入仅预测模式", "info")
|
||||||
|
|
||||||
# ── 3) ★ 一次性全预检 + 用户交互式决策 ──
|
# ── 3) ★ 一次性全预检 + 用户交互式决策 ──
|
||||||
@ -1798,38 +1586,21 @@ class WaterQualityGUI(QMainWindow):
|
|||||||
work_path = Path(work_dir)
|
work_path = Path(work_dir)
|
||||||
|
|
||||||
# 根据步骤名称和约定路径,记录实际输出
|
# 根据步骤名称和约定路径,记录实际输出
|
||||||
if step_name not in self.step_outputs:
|
if step_name not in self.workspace_manager.step_outputs:
|
||||||
self.step_outputs[step_name] = {}
|
self.workspace_manager.step_outputs[step_name] = {}
|
||||||
|
|
||||||
# 扫描工作目录,更新该步骤的输出路径
|
# 扫描工作目录,更新该步骤的输出路径
|
||||||
self.update_step_outputs(step_name, work_path)
|
self.workspace_manager.update_step_outputs(step_name, work_path)
|
||||||
|
|
||||||
# 自动填充依赖该步骤输出的后续步骤
|
# 自动填充依赖该步骤输出的后续步骤
|
||||||
self.auto_populate_dependent_steps(step_name)
|
self.auto_populate_dependent_steps(step_name)
|
||||||
|
|
||||||
def update_step_outputs(self, step_name, work_path):
|
|
||||||
"""更新指定步骤的输出路径记录"""
|
|
||||||
if step_name not in self.step_default_outputs:
|
|
||||||
return
|
|
||||||
|
|
||||||
step_outputs = self.step_default_outputs[step_name]
|
|
||||||
|
|
||||||
for output_type, relative_path in step_outputs.items():
|
|
||||||
if '*' in relative_path:
|
|
||||||
# 处理通配符路径
|
|
||||||
pattern_path = work_path / relative_path.replace('*', '*')
|
|
||||||
matching_files = list(pattern_path.parent.glob(pattern_path.name))
|
|
||||||
if matching_files:
|
|
||||||
# 选择最新的文件
|
|
||||||
latest_file = max(matching_files, key=lambda p: p.stat().st_mtime)
|
|
||||||
self.step_outputs[step_name][output_type] = str(latest_file)
|
|
||||||
else:
|
|
||||||
output_path = work_path / relative_path
|
|
||||||
if output_path.exists():
|
|
||||||
self.step_outputs[step_name][output_type] = str(output_path)
|
|
||||||
|
|
||||||
def auto_populate_dependent_steps(self, completed_step):
|
def auto_populate_dependent_steps(self, completed_step):
|
||||||
"""自动填充依赖于已完成步骤的后续步骤"""
|
"""自动填充依赖于已完成步骤的后续步骤"""
|
||||||
|
ref_img_path = None
|
||||||
|
if hasattr(self, 'step1_panel'):
|
||||||
|
ref_img_path = self.step1_panel.img_file.get_path()
|
||||||
|
|
||||||
for step_id, dependencies in self.step_dependencies.items():
|
for step_id, dependencies in self.step_dependencies.items():
|
||||||
for input_field, (dep_step, output_type, panel_attr) in dependencies.items():
|
for input_field, (dep_step, output_type, panel_attr) in dependencies.items():
|
||||||
if dep_step == completed_step:
|
if dep_step == completed_step:
|
||||||
@ -1841,7 +1612,7 @@ class WaterQualityGUI(QMainWindow):
|
|||||||
if not file_widget.get_path().strip():
|
if not file_widget.get_path().strip():
|
||||||
work_dir = getattr(self, 'work_dir', './work_dir')
|
work_dir = getattr(self, 'work_dir', './work_dir')
|
||||||
work_path = Path(work_dir)
|
work_path = Path(work_dir)
|
||||||
output_path = self.find_step_output(work_path, dep_step, output_type)
|
output_path = self.workspace_manager.find_step_output(work_path, dep_step, output_type, ref_img_path=ref_img_path)
|
||||||
if output_path and Path(output_path).exists():
|
if output_path and Path(output_path).exists():
|
||||||
file_widget.set_path(output_path)
|
file_widget.set_path(output_path)
|
||||||
self.log_message(f"步骤完成后自动填充 {step_id}.{input_field}: {output_path}", "info")
|
self.log_message(f"步骤完成后自动填充 {step_id}.{input_field}: {output_path}", "info")
|
||||||
|
|||||||
Reference in New Issue
Block a user