From 2261b4b30e27a22495b531f76bf6fceffaa080d7 Mon Sep 17 00:00:00 2001 From: DXC Date: Thu, 18 Jun 2026 09:19:51 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20Step1~Step14=20=E9=9D=A2=E6=9D=BF?= =?UTF-8?q?=E5=8D=95=E6=AD=A5=E6=8C=89=E9=92=AE=20EventBus=20=E8=A7=A3?= =?UTF-8?q?=E8=80=A6=20+=20Handler=20=E8=A1=A5=E5=85=A8=EF=BC=88Step8~Step?= =?UTF-8?q?14=EF=BC=89+=20=E6=97=A7=E4=B8=8A=E5=B8=9D=E7=B1=BB=E5=88=A0?= =?UTF-8?q?=E9=99=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 9 个面板(step1~step6/step8_ml_train/step8_qaa/step9_ml_predict/step10)单步执行按钮从 parent 链上溯改为 global_event_bus.publish('RequestRunSingleStep') - PipelineExecutor 新增 _on_request_run_single_step 订阅 - 新增 Handler: step8_ml_train / step9_ml_predict / step10_qaa_inversion / step11_concentration / step12_kriging / step13_visualization / step14_report - 删除旧 water_quality_inversion_pipeline_GUI.py(上帝类已肢解完毕) --- src/core/handlers/__init__.py | 14 + src/core/handlers/base.py | 5 + src/core/handlers/register_handlers.py | 14 + src/core/handlers/step10_qaa_inversion.py | 137 + src/core/handlers/step11_concentration.py | 71 + src/core/handlers/step12_kriging.py | 81 + src/core/handlers/step13_visualization.py | 349 +++ src/core/handlers/step14_report.py | 142 + src/core/handlers/step8_ml_train.py | 58 + src/core/handlers/step9_ml_predict.py | 64 + .../water_quality_inversion_pipeline_GUI.py | 2597 ----------------- src/gui/core/pipeline_executor.py | 170 +- src/gui/core/viz_thread.py | 14 +- src/gui/core/worker_thread.py | 71 +- src/gui/panels/step10_watercolor_panel.py | 49 +- src/gui/panels/step11_map_panel.py | 15 +- src/gui/panels/step12_viz_panel.py | 21 +- src/gui/panels/step14_panel.py | 15 +- src/gui/panels/step1_panel.py | 36 +- src/gui/panels/step2_panel.py | 19 +- src/gui/panels/step3_panel.py | 30 +- src/gui/panels/step4_sampling_panel.py | 19 +- src/gui/panels/step5_clean_panel.py | 19 +- src/gui/panels/step6_feature_panel.py | 31 +- src/gui/panels/step8_ml_train_panel.py | 19 +- src/gui/panels/step8_qaa_panel.py | 19 +- src/gui/panels/step9_ml_predict_panel.py | 53 +- src/gui/water_quality_gui.py | 4 +- 28 files changed, 1446 insertions(+), 2690 deletions(-) create mode 100644 src/core/handlers/step10_qaa_inversion.py create mode 100644 src/core/handlers/step11_concentration.py create mode 100644 src/core/handlers/step12_kriging.py create mode 100644 src/core/handlers/step13_visualization.py create mode 100644 src/core/handlers/step14_report.py create mode 100644 src/core/handlers/step8_ml_train.py create mode 100644 src/core/handlers/step9_ml_predict.py delete mode 100644 src/core/water_quality_inversion_pipeline_GUI.py diff --git a/src/core/handlers/__init__.py b/src/core/handlers/__init__.py index fe2a70d..91bfd57 100644 --- a/src/core/handlers/__init__.py +++ b/src/core/handlers/__init__.py @@ -18,6 +18,13 @@ from src.core.handlers.step4_sampling import Step4SamplingHandler from src.core.handlers.step5_process_csv import Step5ProcessCsvHandler from src.core.handlers.step6_extract_spectra import Step6ExtractSpectraHandler from src.core.handlers.step7_calc_indices import Step7CalcIndicesHandler +from src.core.handlers.step8_ml_train import Step8MlTrainHandler +from src.core.handlers.step9_ml_predict import Step9MlPredictHandler +from src.core.handlers.step10_qaa_inversion import Step10QaaInversionHandler +from src.core.handlers.step11_concentration import Step11ConcentrationHandler +from src.core.handlers.step12_kriging import Step12KrigingHandler +from src.core.handlers.step13_visualization import Step13VisualizationHandler +from src.core.handlers.step14_report import Step14ReportHandler __all__ = [ 'BaseStepHandler', @@ -29,4 +36,11 @@ __all__ = [ 'Step5ProcessCsvHandler', 'Step6ExtractSpectraHandler', 'Step7CalcIndicesHandler', + 'Step8MlTrainHandler', + 'Step9MlPredictHandler', + 'Step10QaaInversionHandler', + 'Step11ConcentrationHandler', + 'Step12KrigingHandler', + 'Step13VisualizationHandler', + 'Step14ReportHandler', ] diff --git a/src/core/handlers/base.py b/src/core/handlers/base.py index 4ffdaab..1bb6b62 100644 --- a/src/core/handlers/base.py +++ b/src/core/handlers/base.py @@ -74,6 +74,11 @@ class PipelineContext: self.training_csv_path: Optional[str] = None self.indices_path: Optional[str] = None self.custom_regression_path: Optional[str] = None + self.sampling_csv_path: Optional[str] = None + self.prediction_files: Dict[str, str] = {} + self.distribution_map_path: Optional[str] = None + self.qaa_output_path: Optional[str] = None + self.concentration_output_path: Optional[str] = None # ── 计时 ── self.step_timings: Dict[str, dict] = {} diff --git a/src/core/handlers/register_handlers.py b/src/core/handlers/register_handlers.py index 2793648..308a6d5 100644 --- a/src/core/handlers/register_handlers.py +++ b/src/core/handlers/register_handlers.py @@ -18,6 +18,13 @@ from src.core.handlers.step4_sampling import Step4SamplingHandler from src.core.handlers.step5_process_csv import Step5ProcessCsvHandler from src.core.handlers.step6_extract_spectra import Step6ExtractSpectraHandler from src.core.handlers.step7_calc_indices import Step7CalcIndicesHandler +from src.core.handlers.step8_ml_train import Step8MlTrainHandler +from src.core.handlers.step9_ml_predict import Step9MlPredictHandler +from src.core.handlers.step10_qaa_inversion import Step10QaaInversionHandler +from src.core.handlers.step11_concentration import Step11ConcentrationHandler +from src.core.handlers.step12_kriging import Step12KrigingHandler +from src.core.handlers.step13_visualization import Step13VisualizationHandler +from src.core.handlers.step14_report import Step14ReportHandler if TYPE_CHECKING: from src.core.handlers.pipeline_scheduler import PipelineScheduler @@ -41,3 +48,10 @@ def register_all_handlers(scheduler: PipelineScheduler): scheduler.register_handler(Step5ProcessCsvHandler()) scheduler.register_handler(Step6ExtractSpectraHandler()) scheduler.register_handler(Step7CalcIndicesHandler()) + scheduler.register_handler(Step8MlTrainHandler()) + scheduler.register_handler(Step9MlPredictHandler()) + scheduler.register_handler(Step10QaaInversionHandler()) + scheduler.register_handler(Step11ConcentrationHandler()) + scheduler.register_handler(Step12KrigingHandler()) + scheduler.register_handler(Step13VisualizationHandler()) + scheduler.register_handler(Step14ReportHandler()) diff --git a/src/core/handlers/step10_qaa_inversion.py b/src/core/handlers/step10_qaa_inversion.py new file mode 100644 index 0000000..b229f1e --- /dev/null +++ b/src/core/handlers/step10_qaa_inversion.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Step10 处理器:QAA 准解析算法反演 + +将原 WaterQualityInversionPipeline.step8_qaa_inversion() 方法 +剥离为独立的 Step10QaaInversionHandler。 +""" + +import os +import time +from typing import Any, Dict + +import numpy as np +import pandas as pd + +from src.core.handlers.base import BaseStepHandler, PipelineContext + + +class Step10QaaInversionHandler(BaseStepHandler): + """步骤10:QAA 准解析算法反演(非经验模型)。 + + 对应 config key: 'step10_qaa' + 直接使用 QAABaselineSolver 进行物理推导。 + """ + + step_key = 'step10_qaa' + + def execute(self, context: PipelineContext, config: dict) -> Dict[str, Any]: + from src.core.algorithms.qaa.qaas_baseline import QAABaselineSolver + from src.utils.water_owt_config import get_lambda_0 + + step_start_time = time.time() + + lake_name = config.get('lake_name', 'Unknown') + lambda_0 = config.get('lambda_0', get_lambda_0(lake_name)) + output_dir = os.path.join(context.work_dir, "10_QAA_Inversion") + os.makedirs(output_dir, exist_ok=True) + output_path = config.get('output_path') or os.path.join(output_dir, "a_lambda_results.csv") + + spectrum_csv = config.get('spectrum_csv_path') + if not spectrum_csv: + spectrum_csv = context.training_csv_path + if not spectrum_csv or not os.path.exists(spectrum_csv): + fallback_candidates = [] + step6_dir = os.path.join(context.work_dir, "6_Spectral_Feature_Extraction") + if os.path.isdir(step6_dir): + for f in sorted(os.listdir(step6_dir)): + if f.lower().endswith('.csv'): + fallback_candidates.append(os.path.join(step6_dir, f)) + if fallback_candidates: + spectrum_csv = fallback_candidates[0] + context.notify('step10_qaa', 'info', + f'spectrum_csv_path 为空,已自动回退到 step6 产物: {spectrum_csv}') + else: + msg = f'训练光谱 CSV 不存在或路径为空: {spectrum_csv}' + context.notify('step10_qaa', 'error', msg) + step_end_time = time.time() + context.record_step_time( + "步骤10: QAA 反演", step_start_time, step_end_time, + status="failed", error=msg + ) + return {'error': msg} + + try: + df = pd.read_csv(spectrum_csv, encoding="utf-8-sig") + col_names = df.columns.tolist() + + wavelength_col_idx = None + for i, col in enumerate(col_names): + try: + float(col) + wavelength_col_idx = i + break + except (ValueError, TypeError): + pass + + if wavelength_col_idx is None: + msg = "无法从 CSV 列名中识别波长信息" + context.notify('step10_qaa', 'error', msg) + step_end_time = time.time() + context.record_step_time( + "步骤10: QAA 反演", step_start_time, step_end_time, + status="failed", error=msg + ) + return {'error': msg} + + meta_df = df.iloc[:, :wavelength_col_idx].copy() + wavelengths = np.array([float(c) for c in col_names[wavelength_col_idx:]], dtype=np.float64) + data_matrix = df.iloc[:, wavelength_col_idx:].values.astype(np.float64) + if data_matrix.ndim == 1: + data_matrix = data_matrix[np.newaxis, :] + + solver = QAABaselineSolver() + raw_result = solver.run_inversion(wavelengths, data_matrix, lambda_0) + + if isinstance(raw_result, list): + sample_results = raw_result + else: + sample_results = [raw_result] + + rows_out = [] + for i, sample_result in enumerate(sample_results): + wl_arr = wavelengths + a_arr = sample_result['a_lambda'] + bb_arr = sample_result['bb_lambda'] + meta_row = meta_df.iloc[i].to_dict() if i < len(meta_df) else {} + for j, wl in enumerate(wl_arr): + rows_out.append({ + 'sample_id': f"sample_{i}", + 'Wavelength': wl, + 'a_lambda': a_arr[j], + 'bb_lambda': bb_arr[j], + **meta_row, + }) + + result_df = pd.DataFrame(rows_out) + result_df.to_csv(output_path, index=False, float_format='%.8f') + + context.qaa_output_path = output_path + + step_end_time = time.time() + context.record_step_time( + "步骤10: QAA 反演", step_start_time, step_end_time + ) + context.notify('step10_qaa', 'completed', + f"QAA 反演完毕,水域={lake_name},λ₀={lambda_0}nm") + + return {'qaa_output_path': output_path} + + except Exception as e: + step_end_time = time.time() + context.record_step_time( + "步骤10: QAA 反演", step_start_time, step_end_time, + status="failed", error=str(e) + ) + raise diff --git a/src/core/handlers/step11_concentration.py b/src/core/handlers/step11_concentration.py new file mode 100644 index 0000000..238b20d --- /dev/null +++ b/src/core/handlers/step11_concentration.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Step11 处理器:浓度反演 + +将原 WaterQualityInversionPipeline.step9_concentration_inversion() 方法 +剥离为独立的 Step11ConcentrationHandler。 +""" + +import os +import time +from typing import Any, Dict + +from src.core.handlers.base import BaseStepHandler, PipelineContext + + +class Step11ConcentrationHandler(BaseStepHandler): + """步骤11:浓度反演(基于 QAA Step10 输出的 a_lambda/bb_lambda)。 + + 对应 config key: 'step11_concentration' + 直接使用 ConcentrationPipeline 进行浓度反演。 + """ + + step_key = 'step11_concentration' + + def execute(self, context: PipelineContext, config: dict) -> Dict[str, Any]: + from src.core.algorithms.concentration_inversion import ConcentrationPipeline + + step_start_time = time.time() + + input_csv = config.get('input_csv') or context.qaa_output_path + output_csv = config.get('output_csv') + lake_case = config.get('lake_case', 'medium') + + if not input_csv or not os.path.exists(input_csv): + msg = f"QAA 结果文件不存在或路径为空: {input_csv}" + context.notify('step11_concentration', 'error', msg) + step_end_time = time.time() + context.record_step_time( + "步骤11: 浓度反演", step_start_time, step_end_time, + status="failed", error=msg + ) + return {'error': msg} + + if not output_csv: + output_dir = os.path.join(context.work_dir, "11_Concentration") + os.makedirs(output_dir, exist_ok=True) + output_csv = os.path.join(output_dir, "final_concentrations.csv") + + try: + pipeline = ConcentrationPipeline(lake_case=lake_case) + result_csv = pipeline.run_pipeline(input_csv, output_csv) + + context.concentration_output_path = result_csv + + step_end_time = time.time() + context.record_step_time( + "步骤11: 浓度反演", step_start_time, step_end_time + ) + context.notify('step11_concentration', 'completed', + f"浓度反演完毕,结果保存于: {result_csv}") + + return {'concentration_output_path': result_csv} + + except Exception as e: + step_end_time = time.time() + context.record_step_time( + "步骤11: 浓度反演", step_start_time, step_end_time, + status="failed", error=str(e) + ) + raise diff --git a/src/core/handlers/step12_kriging.py b/src/core/handlers/step12_kriging.py new file mode 100644 index 0000000..d3b8807 --- /dev/null +++ b/src/core/handlers/step12_kriging.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Step12 处理器:克里金空间插值与分布图生成 + +将原 WaterQualityInversionPipeline.step10_map() 方法 +剥离为独立的 Step12KrigingHandler。 +""" + +import time +from pathlib import Path +from typing import Any, Dict + +from src.core.handlers.base import BaseStepHandler, PipelineContext +from src.core.steps.mapping_step import MappingStep + + +class Step12KrigingHandler(BaseStepHandler): + """步骤12:克里金空间插值与分布图生成。 + + 对应 config key: 'step12_kriging' + 委托类: MappingStep.generate_distribution_map() + """ + + step_key = 'step12_kriging' + + def execute(self, context: PipelineContext, config: dict) -> Dict[str, Any]: + step_start_time = time.time() + + prediction_csv_path = config.get('prediction_csv_path') + boundary_shp_path = config.get('boundary_shp_path') + + # 强制输出到 visualization_dir + csv_name = Path(prediction_csv_path).stem if prediction_csv_path else "distribution" + forced_image_path = str(context.visualization_dir / f"{csv_name}_distribution.png") + viz_dir_resolved = str(context.visualization_dir) + + output_image_path = config.get('output_image_path') + if output_image_path and output_image_path != forced_image_path: + norm_user = output_image_path.replace('\\', '/').rstrip('/') + norm_viz = viz_dir_resolved.replace('\\', '/').rstrip('/') + if not norm_user.startswith(norm_viz + '/') and norm_user != norm_viz: + output_image_path = forced_image_path + else: + output_image_path = forced_image_path + + try: + result = MappingStep.generate_distribution_map( + prediction_csv_path=prediction_csv_path, + boundary_shp_path=boundary_shp_path, + output_image_path=output_image_path, + resolution=config.get('resolution', 30), + input_crs=config.get('input_crs', 'EPSG:32651'), + output_crs=config.get('output_crs', 'EPSG:4326'), + show_sample_points=config.get('show_sample_points', False), + base_map_tif=config.get('base_map_tif'), + use_distance_diffusion=config.get('use_distance_diffusion', True), + max_diffusion_distance=config.get('max_diffusion_distance'), + diffusion_power=config.get('diffusion_power', 2), + diffusion_n_neighbors=config.get('diffusion_n_neighbors', 15), + cmap=config.get('cmap'), + expand_ratio=config.get('expand_ratio', 0.05), + output_dir=str(context.visualization_dir), + ) + + context.distribution_map_path = result + + step_end_time = time.time() + context.record_step_time( + "步骤12: 克里金插值与分布图", step_start_time, step_end_time + ) + + return {'distribution_map_path': result} + + except Exception as e: + step_end_time = time.time() + context.record_step_time( + "步骤12: 克里金插值与分布图", step_start_time, step_end_time, + status="failed", error=str(e) + ) + raise diff --git a/src/core/handlers/step13_visualization.py b/src/core/handlers/step13_visualization.py new file mode 100644 index 0000000..5728795 --- /dev/null +++ b/src/core/handlers/step13_visualization.py @@ -0,0 +1,349 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Step13 处理器:可视化成图 + +将原 WaterQualityInversionPipeline 中的可视化方法 +(散点图、箱型图、光谱曲线、统计图表、耀斑预览) +剥离为独立的 Step13VisualizationHandler。 +""" + +import time +from pathlib import Path +from typing import Any, Dict, List, Optional + +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +import seaborn as sns + +from src.core.handlers.base import BaseStepHandler, PipelineContext + + +class Step13VisualizationHandler(BaseStepHandler): + """步骤13:可视化成图。 + + 对应 config key: 'step13_visualization' + 包含:散点图、箱型图、光谱曲线、统计图表、耀斑预览。 + """ + + step_key = 'step13_visualization' + + def execute(self, context: PipelineContext, config: dict) -> Dict[str, Any]: + step_start_time = time.time() + output_files: Dict[str, Any] = {} + + try: + # ── 散点图 ── + if config.get('generate_scatter', True): + if context.training_csv_path and context.models_dir.exists(): + try: + scatter_config = config.get('scatter_config', {}) + scatter_paths = self._generate_scatter_plots(context, scatter_config) + output_files['scatter_plots'] = scatter_paths + except Exception as e: + context.notify('step13_visualization', 'warning', + f"生成散点图时出错: {e}") + + # ── 箱型图 ── + if config.get('generate_boxplots', True): + if context.processed_csv_path: + try: + boxplot_config = config.get('boxplot_config', {}) + boxplot_paths = self._generate_boxplots(context, boxplot_config) + output_files['boxplots'] = boxplot_paths + except Exception as e: + context.notify('step13_visualization', 'warning', + f"生成箱型图时出错: {e}") + + # ── 光谱曲线 ── + if config.get('generate_spectrum', True): + if context.training_csv_path: + try: + spectrum_paths = self._generate_spectrum_plots(context, config) + output_files['spectrum_plots'] = spectrum_paths + except Exception as e: + context.notify('step13_visualization', 'warning', + f"生成光谱曲线图时出错: {e}") + + # ── 统计图表 ── + if config.get('generate_statistics', True): + if context.processed_csv_path: + try: + stat_charts = self._generate_statistics(context) + output_files['statistical_charts'] = stat_charts + except Exception as e: + context.notify('step13_visualization', 'warning', + f"生成统计图表时出错: {e}") + + # ── 耀斑预览 ── + if config.get('generate_glint_previews', True): + try: + glint_config = config.get('glint_preview_config', {}) + preview_paths = context.visualizer.generate_glint_deglint_previews( + work_dir=glint_config.get('work_dir') or str(context.work_dir), + output_subdir=glint_config.get('output_subdir', 'glint_deglint_previews'), + generate_glint=glint_config.get('generate_glint', True), + generate_deglint=glint_config.get('generate_deglint', True), + ) + output_files['glint_deglint_previews'] = preview_paths + except Exception as e: + context.notify('step13_visualization', 'warning', + f"生成耀斑预览图时出错: {e}") + + step_end_time = time.time() + context.record_step_time( + "步骤13: 可视化成图", step_start_time, step_end_time + ) + + return {'visualization_outputs': output_files} + + except Exception as e: + step_end_time = time.time() + context.record_step_time( + "步骤13: 可视化成图", step_start_time, step_end_time, + status="failed", error=str(e) + ) + raise + + # ── 散点图 ── + + def _generate_scatter_plots(self, context: PipelineContext, + scatter_config: dict) -> Dict[str, str]: + training_csv_path = context.training_csv_path + models_dir = str(context.models_dir) + metric = scatter_config.get('metric', 'test_r2') + use_enhanced = scatter_config.get('use_enhanced', True) + feature_start_column = scatter_config.get('feature_start_column', 13) + test_size = scatter_config.get('test_size', 0.2) + random_state = scatter_config.get('random_state', 42) + + scatter_paths = {} + + if use_enhanced: + try: + results = context.scatter_batch.batch_plot_scatter( + models_root_dir=models_dir, + csv_path=training_csv_path, + output_dir=str(context.visualization_dir / "scatter_plots"), + metric=metric, + target_column=None, + feature_start_column=feature_start_column, + test_size=test_size, + random_state=random_state, + ) + for target_name, result in results.items(): + if result.get('status') == 'success': + scatter_paths[target_name] = result.get('save_path', '') + except Exception: + use_enhanced = False + + if not use_enhanced or not scatter_paths: + from src.core.prediction.inference_batch import WaterQualityInference + models_path = Path(models_dir) + for target_folder in models_path.iterdir(): + if not target_folder.is_dir(): + continue + target_name = target_folder.name + try: + inferencer = WaterQualityInference(str(target_folder)) + eval_result = inferencer.evaluate_with_split( + data_csv_path=training_csv_path, + split_method="spxy", + test_size=test_size, + random_state=random_state, + metric=metric, + ) + predictions = eval_result.get('predictions', {}) + if predictions: + y_train_true = predictions.get('y_train_true') + y_train_pred = predictions.get('y_train_pred') + y_test_true = predictions.get('y_test_true') + y_test_pred = predictions.get('y_test_pred') + metrics = eval_result.get('test_metrics', {}) + if y_train_true is not None and y_test_true is not None: + y_all_true = np.concatenate([y_train_true, y_test_true]) + y_all_pred = np.concatenate([y_train_pred, y_test_pred]) + train_indices = np.arange(len(y_train_true)) + test_indices = np.arange(len(y_train_true), len(y_all_true)) + scatter_path = context.visualizer.plot_scatter_true_vs_pred( + y_true=y_all_true, + y_pred=y_all_pred, + target_name=target_name, + train_indices=train_indices, + test_indices=test_indices, + metrics={ + 'train_r2': eval_result.get('train_metrics', {}).get('r2', 0), + 'test_r2': metrics.get('r2', 0), + 'train_rmse': eval_result.get('train_metrics', {}).get('rmse', 0), + 'test_rmse': metrics.get('rmse', 0), + } + ) + scatter_paths[target_name] = scatter_path + except Exception: + continue + + return scatter_paths + + # ── 箱型图 ── + + def _generate_boxplots(self, context: PipelineContext, + boxplot_config: dict) -> Dict[str, str]: + csv_path = context.processed_csv_path + parameter_columns = boxplot_config.get('parameter_columns') + data_start_column = boxplot_config.get('data_start_column', 4) + save_individual = boxplot_config.get('save_individual', True) + use_seaborn = boxplot_config.get('use_seaborn', True) + + df = pd.read_csv(csv_path) + + if parameter_columns is None: + data_columns = df.iloc[:, data_start_column:] + parameter_columns = list(data_columns.columns) + else: + parameter_columns = [col for col in parameter_columns if col in df.columns] + + if not parameter_columns: + return {} + + boxplot_dir = context.visualization_dir / "boxplots" + boxplot_dir.mkdir(parents=True, exist_ok=True) + boxplot_paths = {} + + if save_individual: + for column in parameter_columns: + if column not in df.columns: + continue + clean_data = df[column].dropna() + if len(clean_data) == 0: + continue + try: + plt.figure(figsize=(8, 6)) + if use_seaborn: + plot_data = pd.DataFrame({'参数': [column] * len(clean_data), '数值': clean_data}) + sns.boxplot(data=plot_data, x='参数', y='数值', palette='Set2') + sns.stripplot(data=plot_data, x='参数', y='数值', + color='red', alpha=0.6, size=5, jitter=True) + else: + box_plot = plt.boxplot([clean_data], labels=[column], + patch_artist=True, showfliers=False) + box_plot['boxes'][0].set_facecolor('lightblue') + box_plot['boxes'][0].set_alpha(0.7) + x_pos = np.random.normal(1, 0.04, size=len(clean_data)) + plt.scatter(x_pos, clean_data, alpha=0.6, s=30, color='red', + edgecolors='black', linewidth=0.5, zorder=3) + plt.title(f'{column} - 箱型图', fontsize=14, fontweight='bold') + plt.xlabel('参数', fontsize=12) + plt.ylabel('数值', fontsize=12) + stats_text = (f'数据点数: {len(clean_data)}\n' + f'均值: {clean_data.mean():.2f}\n' + f'中位数: {clean_data.median():.2f}\n' + f'标准差: {clean_data.std():.2f}') + plt.text(0.02, 0.98, stats_text, transform=plt.gca().transAxes, + verticalalignment='top', + bbox=dict(boxstyle='round', + facecolor='wheat' if not use_seaborn else 'lightgreen', + alpha=0.8)) + plt.grid(True, alpha=0.3, linestyle='--') + plt.tight_layout() + safe_name = column.replace('/', '_').replace('\\', '_').replace(':', '_') + save_path = boxplot_dir / f'{safe_name}_boxplot.png' + plt.savefig(save_path, dpi=300, bbox_inches='tight') + plt.close() + boxplot_paths[column] = str(save_path) + except Exception: + continue + + # 综合箱型图 + try: + plt.figure(figsize=(max(12, len(parameter_columns) * 0.8), 8)) + box_data = [] + labels = [] + for column in parameter_columns: + if column in df.columns: + clean_data = df[column].dropna() + if len(clean_data) > 0: + box_data.append(clean_data) + labels.append(column) + if box_data: + if use_seaborn: + melted_data = pd.melt(df[labels], var_name='参数', value_name='数值') + melted_data = melted_data.dropna() + sns.boxplot(data=melted_data, x='参数', y='数值', palette='Set3') + sns.stripplot(data=melted_data, x='参数', y='数值', + color='red', alpha=0.6, size=4, jitter=True) + else: + box_plot = plt.boxplot(box_data, labels=labels, patch_artist=True, showfliers=False) + colors = plt.cm.Set3(np.linspace(0, 1, len(box_data))) + for patch, color in zip(box_plot['boxes'], colors): + patch.set_facecolor(color) + patch.set_alpha(0.7) + for i, data in enumerate(box_data): + x_pos = np.random.normal(i + 1, 0.04, size=len(data)) + plt.scatter(x_pos, data, alpha=0.6, s=20, color='red', + edgecolors='black', linewidth=0.5, zorder=3) + plt.title('水质参数箱型图(综合)', fontsize=16, fontweight='bold') + plt.xlabel('参数', fontsize=12) + plt.ylabel('数值', fontsize=12) + plt.xticks(rotation=45, ha='right') + plt.grid(True, alpha=0.3, linestyle='--') + plt.tight_layout() + combined_path = boxplot_dir / 'all_parameters_boxplot.png' + plt.savefig(combined_path, dpi=300, bbox_inches='tight') + plt.close() + boxplot_paths['all_parameters'] = str(combined_path) + except Exception: + pass + + return boxplot_paths + + # ── 光谱曲线 ── + + def _generate_spectrum_plots(self, context: PipelineContext, + config: dict) -> Dict[str, str]: + csv_path = context.training_csv_path + wavelength_start_column = config.get('feature_start_column', 'UTM_Y') + + df = pd.read_csv(csv_path) + if isinstance(wavelength_start_column, str): + try: + wavelength_start_idx = df.columns.get_loc(wavelength_start_column) + except KeyError: + wavelength_start_idx = 13 + else: + wavelength_start_idx = wavelength_start_column + + parameter_columns = list(df.columns[:wavelength_start_idx]) + if len(parameter_columns) > 2: + parameter_columns = parameter_columns[2:] + + spectrum_paths = {} + for param_col in parameter_columns: + if param_col not in df.columns: + continue + try: + spectrum_path = context.visualizer.plot_spectrum_by_parameter( + csv_path=csv_path, + parameter_column=param_col, + wavelength_start_column=wavelength_start_column, + n_groups=5, + ) + spectrum_paths[param_col] = spectrum_path + except Exception: + continue + + return spectrum_paths + + # ── 统计图表 ── + + def _generate_statistics(self, context: PipelineContext) -> Dict[str, str]: + csv_path = context.processed_csv_path + df = pd.read_csv(csv_path) + parameter_columns = list(df.columns[2:]) + parameter_columns = [col for col in parameter_columns + if df[col].dtype in [np.float64, np.int64]] + + return context.visualizer.plot_statistical_charts( + csv_path=csv_path, + parameter_columns=parameter_columns, + ) diff --git a/src/core/handlers/step14_report.py b/src/core/handlers/step14_report.py new file mode 100644 index 0000000..3cd2ccd --- /dev/null +++ b/src/core/handlers/step14_report.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Step14 处理器:报告生成 + +将原 WaterQualityInversionPipeline.generate_pipeline_report() 方法 +剥离为独立的 Step14ReportHandler。 +""" + +import time +from datetime import datetime +from pathlib import Path +from typing import Any, Dict + +import numpy as np +import pandas as pd + +from src.core.handlers.base import BaseStepHandler, PipelineContext + + +class Step14ReportHandler(BaseStepHandler): + """步骤14:流程执行报告生成。 + + 对应 config key: 'step14_report' + 生成 CSV 和 TXT 格式的流程执行报告。 + """ + + step_key = 'step14_report' + + def execute(self, context: PipelineContext, config: dict) -> Dict[str, Any]: + step_start_time = time.time() + + try: + output_path = config.get('output_path') + if output_path is None: + timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') + output_path = str(context.reports_dir / f"pipeline_report_{timestamp}.csv") + + report_data = [] + total_time = 0.0 + + step_order = [ + "步骤1: 水域掩膜生成", + "步骤2: 耀斑区域检测", + "步骤3: 耀斑去除", + "步骤4: 数据预处理", + "步骤5: 光谱提取", + "步骤6: 水质光谱指数计算", + "步骤7: 机器学习建模与训练", + "步骤8: 非经验模型训练", + "步骤9: 自定义回归", + "步骤10: 采样点生成", + "步骤11: 参数预测", + "步骤12: 分布图生成", + ] + + for step_name in step_order: + if step_name in context.step_timings: + timing_info = context.step_timings[step_name] + report_data.append({ + '步骤': step_name, + '开始时间': timing_info['start_time'], + '结束时间': timing_info['end_time'], + '耗时(秒)': f"{timing_info['elapsed_seconds']:.2f}", + '耗时(格式化)': timing_info['elapsed_formatted'], + '状态': timing_info['status'], + '错误信息': timing_info.get('error', '') + }) + if timing_info['status'] == 'completed': + total_time += timing_info['elapsed_seconds'] + + if context.pipeline_start_time and context.pipeline_end_time: + pipeline_total = context.pipeline_end_time - context.pipeline_start_time + report_data.append({ + '步骤': '总计', + '开始时间': datetime.fromtimestamp(context.pipeline_start_time).strftime('%Y-%m-%d %H:%M:%S'), + '结束时间': datetime.fromtimestamp(context.pipeline_end_time).strftime('%Y-%m-%d %H:%M:%S'), + '耗时(秒)': f"{pipeline_total:.2f}", + '耗时(格式化)': context._format_time(pipeline_total), + '状态': 'completed', + '错误信息': '' + }) + + df_report = pd.DataFrame(report_data) + df_report.to_csv(output_path, index=False, encoding='utf-8-sig') + + txt_output_path = str(Path(output_path).with_suffix('.txt')) + with open(txt_output_path, 'w', encoding='utf-8') as f: + f.write("=" * 80 + "\n") + f.write("水质参数反演流程执行报告\n") + f.write("=" * 80 + "\n\n") + + if context.pipeline_start_time and context.pipeline_end_time: + f.write(f"流程开始时间: {datetime.fromtimestamp(context.pipeline_start_time).strftime('%Y-%m-%d %H:%M:%S')}\n") + f.write(f"流程结束时间: {datetime.fromtimestamp(context.pipeline_end_time).strftime('%Y-%m-%d %H:%M:%S')}\n") + f.write(f"总耗时: {context._format_time(context.pipeline_end_time - context.pipeline_start_time)}\n\n") + + f.write("-" * 80 + "\n") + f.write("各步骤执行详情:\n") + f.write("-" * 80 + "\n\n") + + for step_name in step_order: + if step_name in context.step_timings: + timing_info = context.step_timings[step_name] + f.write(f"{step_name}\n") + f.write(f" 开始时间: {timing_info['start_time']}\n") + f.write(f" 结束时间: {timing_info['end_time']}\n") + f.write(f" 耗时: {timing_info['elapsed_formatted']} ({timing_info['elapsed_seconds']:.2f}秒)\n") + f.write(f" 状态: {timing_info['status']}\n") + if timing_info.get('error'): + f.write(f" 错误: {timing_info['error']}\n") + f.write("\n") + + f.write("-" * 80 + "\n") + f.write("统计摘要:\n") + f.write("-" * 80 + "\n") + completed_steps = [s for s in context.step_timings.values() if s['status'] == 'completed'] + failed_steps = [s for s in context.step_timings.values() if s['status'] == 'failed'] + skipped_steps = [s for s in context.step_timings.values() if s['status'] == 'skipped'] + f.write(f"成功完成的步骤: {len(completed_steps)}\n") + f.write(f"失败的步骤: {len(failed_steps)}\n") + f.write(f"跳过的步骤: {len(skipped_steps)}\n") + if completed_steps: + completed_times = [s['elapsed_seconds'] for s in completed_steps] + f.write(f"平均耗时: {context._format_time(np.mean(completed_times))}\n") + f.write(f"最长耗时: {context._format_time(np.max(completed_times))}\n") + f.write(f"最短耗时: {context._format_time(np.min(completed_times))}\n") + + step_end_time = time.time() + context.record_step_time( + "步骤14: 报告生成", step_start_time, step_end_time + ) + + return {'report_csv': output_path, 'report_txt': txt_output_path} + + except Exception as e: + step_end_time = time.time() + context.record_step_time( + "步骤14: 报告生成", step_start_time, step_end_time, + status="failed", error=str(e) + ) + raise diff --git a/src/core/handlers/step8_ml_train.py b/src/core/handlers/step8_ml_train.py new file mode 100644 index 0000000..4a291b2 --- /dev/null +++ b/src/core/handlers/step8_ml_train.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Step8 处理器:机器学习建模与训练 + +将原 WaterQualityInversionPipeline.step8_train_ml() 方法 +剥离为独立的 Step8MlTrainHandler。 +""" + +import time +from typing import Any, Dict + +from src.core.handlers.base import BaseStepHandler, PipelineContext +from src.core.steps.modeling_step import ModelingStep + + +class Step8MlTrainHandler(BaseStepHandler): + """步骤8:机器学习建模与训练。 + + 对应 config key: 'step8_ml_train' + 委托类: ModelingStep.train_models() + """ + + step_key = 'step8_ml_train' + + def execute(self, context: PipelineContext, config: dict) -> Dict[str, Any]: + step_start_time = time.time() + + training_csv_path = self._resolve_path( + config.get('training_csv_path'), context.training_csv_path, 'training_csv' + ) + + try: + result = ModelingStep.train_models( + feature_start_column=config.get('feature_start_column', '374.285004'), + preprocessing_methods=config.get('preprocessing_methods'), + model_names=config.get('model_names'), + split_methods=config.get('split_methods'), + cv_folds=config.get('cv_folds', 5), + training_csv_path=training_csv_path, + output_dir=str(context.models_dir), + _report_generator=context.report_generator, + ) + + step_end_time = time.time() + context.record_step_time( + "步骤8: 机器学习建模与训练", step_start_time, step_end_time + ) + + return {'models_dir': result} + + except Exception as e: + step_end_time = time.time() + context.record_step_time( + "步骤8: 机器学习建模与训练", step_start_time, step_end_time, + status="failed", error=str(e) + ) + raise diff --git a/src/core/handlers/step9_ml_predict.py b/src/core/handlers/step9_ml_predict.py new file mode 100644 index 0000000..fd4b4cb --- /dev/null +++ b/src/core/handlers/step9_ml_predict.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Step9 处理器:机器学习推理预测 + +将原 WaterQualityInversionPipeline.step9_predict_ml() 方法 +剥离为独立的 Step9MlPredictHandler。 +""" + +import time +from typing import Any, Dict + +from src.core.handlers.base import BaseStepHandler, PipelineContext +from src.core.steps.prediction_step import PredictionStep + + +class Step9MlPredictHandler(BaseStepHandler): + """步骤9:机器学习推理预测。 + + 对应 config key: 'step9_ml_predict' + 委托类: PredictionStep.predict_water_quality() + """ + + step_key = 'step9_ml_predict' + + def execute(self, context: PipelineContext, config: dict) -> Dict[str, Any]: + step_start_time = time.time() + + sampling_csv_path = self._resolve_path( + config.get('sampling_csv_path'), context.sampling_csv_path, 'sampling_csv' + ) + + models_dir = config.get('models_dir') or str(context.models_dir) + + try: + result = PredictionStep.predict_water_quality( + sampling_csv_path=sampling_csv_path, + models_dir=models_dir, + metric=config.get('metric', 'test_r2'), + prediction_column=config.get('prediction_column', 'prediction'), + output_dir=str(context.prediction_dir / "9_ML_Prediction"), + _report_generator=context.report_generator, + _external_model=config.get('_external_model'), + _external_model_path=config.get('_external_model_path'), + _external_models_dict=config.get('_external_models_dict'), + _external_model_dir=config.get('_external_model_dir'), + ) + + context.prediction_files.update(result) + + step_end_time = time.time() + context.record_step_time( + "步骤9: 机器学习推理预测", step_start_time, step_end_time + ) + + return {'prediction_files': result} + + except Exception as e: + step_end_time = time.time() + context.record_step_time( + "步骤9: 机器学习推理预测", step_start_time, step_end_time, + status="failed", error=str(e) + ) + raise diff --git a/src/core/water_quality_inversion_pipeline_GUI.py b/src/core/water_quality_inversion_pipeline_GUI.py deleted file mode 100644 index 4254f27..0000000 --- a/src/core/water_quality_inversion_pipeline_GUI.py +++ /dev/null @@ -1,2597 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -""" -水质参数反演框架主程序 - -本程序串联了水质参数反演的所有步骤: -1. 生成水域mask(基于shp或NDWI阈值分割) -2. 找到耀斑区域(支持多种算法:otsu、zscore、percentile、iqr、adaptive、multi_band) -3. 去除耀斑(支持七种方法:subtract_nir、regression_slope、oxygen_absorption、kutser、goodman、hedley、sugar) -4. 对csv文件进行处理,筛选剔除异常值 -5. 根据csv文件的采样点坐标,在去除耀斑的文件中统计采样点的平均光谱 -6. 使用采样点的平均光谱和对应的实测值建立机器学习模型,保存模型权重 -7. 生成根据水域掩膜内且耀斑掩膜外的采样点,统计采样点的平均光谱 -8. 将训练好的最佳机器学习模型应用到采样点的平均光谱上,预测水质参数 -9. 根据采样点的坐标和反演的实测参数,以及水域掩膜,通过插值的方法,得到水质参数的可视化分布图 - -重要特性: -- 每一步都可以独立运行,不受限制 -- 通过设置 skip_dependency_check=True 可以跳过依赖检查 -- 支持灵活的参数传递,允许用户自定义输入输出路径 - -可视化功能: -- 生成模型评估散点图(真实值vs预测值,支持带置信区间的增强版) -- 生成水质参数箱型图(支持单独和综合两种模式) -- 生成光谱曲线对比图 -- 生成统计图表(箱线图、直方图、相关性热力图) -""" - -import os -import sys -import argparse -from pathlib import Path -from typing import Optional, Dict, List, Union -import numpy as np -import pandas as pd -import warnings -import time -from datetime import datetime -import json -warnings.filterwarnings('ignore') - -# 导入各个功能模块 -from src.utils.extract_water_area import rasterize_shp, ndwi -from src.utils.find_severe_glint_area import find_severe_glint_area -from src.preprocessing.process_water_quality_data import process_water_quality_data -from src.core.glint_removal.get_spectral import get_spectral_in_coor -from src.core.modeling.modeling_batch import WaterQualityModelingBatch -from src.utils.sampling import get_spectral_sampling_points_chunked -from src.core.prediction.inference_batch import WaterQualityInference -from src.utils.kriging import KrigingInterpolator, batch_kriging_interpolation -from src.postprocessing.map import ContentMapper -from src.postprocessing.visualization_reports import WaterQualityVisualization, ReportGenerator -from src.core.prediction.sctter_batch import WaterQualityScatterBatch -# 导入底层工具模块(从 utils/ 目录迁移) -from src.core.utils.gdal_helper import ( - get_image_geo_info as _get_image_geo_info, - load_image_as_array as _load_image_as_array, - save_array_as_image as _save_array_as_image, - save_bands_as_image as _save_bands_as_image, - copy_hdr_info as _copy_hdr_info, -) -from src.core.utils.mask_converter import ( - prepare_water_mask_for_algorithm as _prepare_water_mask_for_algorithm, - ensure_water_mask_dat as _ensure_water_mask_dat, -) -from src.core.utils.preview_generator import ( - generate_image_preview as _generate_image_preview, - generate_water_mask_overlay as _generate_water_mask_overlay, - select_rgb_bands_by_wavelength as _select_rgb_bands_by_wavelength, - get_wavelength_info as _get_wavelength_info, -) -# 导入算法层模块 -from src.core.algorithms.interpolation.interpolator import interpolate_zero_pixels_batch as _interpolate_zero_pixels_batch -# 导入业务步骤模块 -from src.core.steps.water_mask_step import WaterMaskStep -from src.core.steps.glint_detection_step import GlintDetectionStep -from src.core.steps.glint_removal_step import GlintRemovalStep -from src.core.steps.data_preparation_step import DataPreparationStep -from src.core.steps.modeling_step import ModelingStep -from src.core.steps.prediction_step import PredictionStep -from src.core.steps.mapping_step import MappingStep -# 导入新的耀斑去除算法 -from src.core.glint_removal.Kutser import Kutser -from src.core.glint_removal.Goodman import Goodman -from src.core.glint_removal.Hedley import Hedley -from src.core.glint_removal.SUGAR import SUGAR, correction_iterative -from src.core.modeling.regression import SingleVariableRegressionAnalysis -# 导入新的自定义回归预测模块 -from src.core.prediction.custom_regression_prediction import CustomRegressionPredictor -# 导入hdr文件处理函数 -try: - from src.utils.util import write_fields_to_hdrfile, get_hdr_file_path, find_band_number - UTIL_AVAILABLE = True - FIND_BAND_AVAILABLE = True -except ImportError: - UTIL_AVAILABLE = False - FIND_BAND_AVAILABLE = False - print("警告: util模块未导入,hdr文件信息复制功能可能无法正常工作") -import matplotlib.pyplot as plt -import seaborn as sns -# 导入插值相关库 -try: - from scipy import ndimage - from scipy.interpolate import griddata, RBFInterpolator - SCIPY_AVAILABLE = True -except ImportError: - SCIPY_AVAILABLE = False - print("警告: scipy未安装,0值像素插值功能可能无法正常工作") -# 导入GDAL用于影像读写 -try: - from osgeo import gdal, ogr - GDAL_AVAILABLE = True - # 注册所有 GDAL/OGR 驱动,确保 ESRI Shapefile 驱动可用 - gdal.AllRegister() - ogr.RegisterAll() - # 启用 GDAL/OGR 异常,使错误以 Python 异常形式抛出(而不是静默失败) - gdal.UseExceptions() - ogr.UseExceptions() -except ImportError: - GDAL_AVAILABLE = False - print("警告: GDAL未安装,新算法可能无法正常工作") - - -class WaterQualityInversionPipeline: - """水质参数反演管道类""" - - def __init__(self, work_dir: str = "./work_dir"): - """ - 初始化管道 - - Args: - work_dir: 工作目录,用于保存所有中间结果 - """ - self.work_dir = Path(work_dir) - self.work_dir.mkdir(parents=True, exist_ok=True) - - # 创建子目录 - self.water_mask_dir = self.work_dir / "1_water_mask" - self.glint_dir = self.work_dir / "2_Glint_Detection" - self.deglint_dir = self.work_dir / "3_deglint" - self.processed_data_dir = self.work_dir / "5_Data_Cleaning" - self.training_spectra_dir = self.work_dir / "6_Spectral_Feature_Extraction" - self.indices_dir = self.work_dir / "7_Water_Quality_Indices" - self.models_dir = self.work_dir / "8_Supervised_Model_Training" - self.non_empirical_models_dir = self.work_dir / "8_Non_Empirical_Regression" - self.custom_regression_dir = self.work_dir / "13_Custom_Regression" - self.sampling_dir = self.work_dir / "4_sampling" - self.prediction_dir = self.work_dir / "11_12_13_predictions" - self.visualization_dir = self.work_dir / "14_visualization" - self.reports_dir = self.work_dir / "reports" - - # 创建所有子目录 - for dir_path in [self.water_mask_dir, self.glint_dir, self.deglint_dir, - self.processed_data_dir, self.training_spectra_dir, - self.indices_dir, self.models_dir, self.non_empirical_models_dir, - self.custom_regression_dir, self.sampling_dir, self.prediction_dir, - self.visualization_dir, self.reports_dir]: - dir_path.mkdir(parents=True, exist_ok=True) - - # 初始化可视化和报告生成器 - self.visualizer = WaterQualityVisualization(str(self.visualization_dir)) - self.report_generator = ReportGenerator(str(self.reports_dir)) - self.scatter_batch = WaterQualityScatterBatch() - - # 设置中文字体 - plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'DejaVu Sans', 'Arial Unicode MS'] - plt.rcParams['axes.unicode_minus'] = False - - # 存储中间结果路径 - self.water_mask_path = None # 存储dat格式的水体掩膜路径(统一格式) - self.glint_mask_path = None - self.interpolated_img_path = None # 存储插值后的影像路径 - self.deglint_img_path = None - self.processed_csv_path = None - self.training_csv_path = None - self.indices_path = None - self.custom_regression_path = None - - # 存储每步的执行时间 - self.step_timings = {} - self.pipeline_start_time = None - self.pipeline_end_time = None - - # 回调函数(用于GUI更新) - self.callback = None - - print(f"工作目录已创建: {self.work_dir}") - - # ---- 步骤输出目录查找接口(归一化所有 panel 的路径访问)---- - - # 用户口语编号 → 权威子目录对象 的映射 - # 同时支持 "stepN"、"stepN_alias"、"subdir名" 三种 key 形式查找 - _STEP_OUTPUT_DIR_MAP = None # 延迟到首次访问时构造 - - def _ensure_step_dir_map(self): - """延迟构造 step_name → 目录对象 映射表(首次访问时执行)""" - if WaterQualityInversionPipeline._STEP_OUTPUT_DIR_MAP is not None: - return WaterQualityInversionPipeline._STEP_OUTPUT_DIR_MAP - wp = self.work_dir - m = { - # 基础步骤 - "step1": wp / "1_water_mask", - "step2": wp / "2_Glint_Detection", - "step3": wp / "3_deglint", - "step4_sampling": wp / "4_sampling", - "step5_clean": wp / "5_Data_Cleaning", - "step6_feature": wp / "6_Spectral_Feature_Extraction", - "step7_index": wp / "7_Water_Quality_Indices", - "step8_ml_train": wp / "8_Supervised_Model_Training", - "step9_ml_predict": wp / "8_Non_Empirical_Regression", - "step10_watercolor": wp / "10_WaterIndex_Images", - "step11_map": wp / "14_visualization", - "step12_viz": wp / "14_visualization", - "step13_report": wp / "14_visualization", - # 合并目录(提供单一访问点,避免分散硬编码) - "step11_predictions": wp / "11_12_13_predictions", - "step12_predictions": wp / "11_12_13_predictions", - "step13_predictions": wp / "11_12_13_predictions", - "custom_regression": wp / "13_Custom_Regression", - "prediction_dir": wp / "11_12_13_predictions", - "visualization": wp / "14_visualization", - "reports": wp / "reports", - # 兼容主流程 step_id(数字+短名) - "step8": wp / "8_Supervised_Model_Training", - "step9": wp / "8_Non_Empirical_Regression", - "step10": wp / "10_WaterIndex_Images", - "step11": wp / "11_12_13_predictions", - "step12": wp / "13_Custom_Regression", - "step13": wp / "reports", - "step14": wp / "14_visualization", - } - WaterQualityInversionPipeline._STEP_OUTPUT_DIR_MAP = m - return m - - def get_step_output_dir(self, step_name: str): - """根据步骤名称返回权威输出目录 Path 对象。 - - 这是 panel 端访问子目录的**唯一**入口。接收以下任意形式 key: - - 完整 panel 属性名: "step11_map", "step12_viz", "step8_ml_train" 等 - - 主流程 step_id: "step8"~"step14" - - 业务别名: "prediction_dir", "visualization", "reports", "custom_regression" - - 兼容口语: "step11_predictions" (=11_12_13_predictions) - - 未知 key 一律回退到 work_dir 本身,并打 warning。 - """ - mapping = self._ensure_step_dir_map() - key = (step_name or "").strip() - if key in mapping: - return mapping[key] - print(f"[pipeline.get_step_output_dir] 未知 step_name={key!r},回退到 work_dir") - return self.work_dir - - def set_callback(self, callback): - """ - 设置回调函数,用于向GUI报告进度 - - Args: - callback: 回调函数,签名为 callback(step_name, status, message="") - status可以是: 'start', 'completed', 'skipped', 'error', 'info', 'warning' - """ - self.callback = callback - - def _notify(self, step_name: str, status: str, message: str = ""): - """通知回调函数""" - if self.callback: - try: - self.callback(step_name, status, message) - except Exception as e: - print(f"回调函数执行失败: {e}") - - def _record_step_time(self, step_name: str, start_time: float, end_time: float, - status: str = "completed", error: Optional[str] = None): - """ - 记录步骤执行时间 - - Args: - step_name: 步骤名称 - start_time: 开始时间(时间戳) - end_time: 结束时间(时间戳) - status: 状态("completed", "failed", "skipped") - error: 错误信息(如果有) - """ - elapsed_time = end_time - start_time - self.step_timings[step_name] = { - 'start_time': datetime.fromtimestamp(start_time).strftime('%Y-%m-%d %H:%M:%S'), - 'end_time': datetime.fromtimestamp(end_time).strftime('%Y-%m-%d %H:%M:%S'), - 'elapsed_seconds': elapsed_time, - 'elapsed_formatted': self._format_time(elapsed_time), - 'status': status, - 'error': error - } - print(f"步骤耗时: {self._format_time(elapsed_time)}") - - def _format_time(self, seconds: float) -> str: - """ - 格式化时间显示 - - Args: - seconds: 秒数 - - Returns: - 格式化后的时间字符串 - """ - if seconds < 60: - return f"{seconds:.2f}秒" - elif seconds < 3600: - minutes = int(seconds // 60) - secs = seconds % 60 - return f"{minutes}分{secs:.2f}秒" - else: - hours = int(seconds // 3600) - minutes = int((seconds % 3600) // 60) - secs = seconds % 60 - return f"{hours}小时{minutes}分{secs:.2f}秒" - - def _ensure_water_mask_dat(self, img_path: str) -> str: - """确保存在 dat 格式水体掩膜,转发至工具模块""" - return _ensure_water_mask_dat( - img_path=img_path, - existing_dat_path=self.water_mask_path, - output_dir=str(self.water_mask_dir) - ) - - def step1_generate_water_mask(self, - mask_path: Optional[str] = None, - img_path: Optional[str] = None, - ndwi_threshold: float = 0.4, - use_ndwi: bool = False, - skip_dependency_check: bool = False, - generate_png: bool = True, - output_path: Optional[str] = None, **kwargs) -> str: - """步骤1: 生成或设置水域mask(Facade)""" - step_start_time = time.time() - try: - result = WaterMaskStep.run( - mask_path=mask_path, - img_path=img_path, - ndwi_threshold=ndwi_threshold, - use_ndwi=use_ndwi, - generate_png=generate_png, - output_path=output_path, - water_mask_dir=str(self.water_mask_dir), - callback=self._notify, - ) - self.water_mask_path = result - step_end_time = time.time() - self._record_step_time("步骤1: 生成水域mask", step_start_time, step_end_time) - return result - except Exception as e: - step_end_time = time.time() - self._record_step_time("步骤1: 生成水域mask", step_start_time, step_end_time, - status="failed", error=str(e)) - raise - - def _generate_image_preview(self, img_path: str, bands: Optional[List[int]] = None) -> str: - """生成影像预览图,转发至工具模块""" - output_path = str(self.water_mask_dir / f"hsi_preview.png") - return _generate_image_preview( - img_path=img_path, - output_path=output_path, - bands=bands, - title="影像预览: RGB波段(基于波长)" - ) - - def _generate_water_mask_overlay(self, img_path: str, mask_path: str) -> str: - """生成水域掩膜叠加图,转发至工具模块""" - output_path = str(self.water_mask_dir / "water_mask_overlay.png") - return _generate_water_mask_overlay( - img_path=img_path, - mask_path=mask_path, - output_path=output_path - ) - def _select_rgb_bands_by_wavelength(self, img_path: str, band_count: int) -> List[int]: - """ - 根据波长选择RGB波段 - - 目标波长: - - 蓝波段 (Blue): 460nm - - 绿波段 (Green): 550nm - - 红波段 (Red): 650nm - - Args: - img_path: 影像文件路径 - band_count: 总波段数 - - Returns: - [R_band_idx, G_band_idx, B_band_idx] 波段索引列表 - """ - try: - # 使用util模块的find_band_number函数根据波长查找波段 - if not FIND_BAND_AVAILABLE: - print("警告: find_band_number函数不可用,使用默认波段选择") - return self._select_rgb_bands_by_index(band_count) - - # 定义RGB目标波长 (nm) - target_wavelengths = { - 'R': 650.0, # 红波段 - 'G': 550.0, # 绿波段 - 'B': 460.0 # 蓝波段 - } - - bands = [] - for color, target_wl in target_wavelengths.items(): - try: - band_idx = find_band_number(target_wl, img_path) - # 确保波段索引在有效范围内 - band_idx = max(0, min(band_idx, band_count - 1)) - bands.append(band_idx) - print(f" {color}波段: 目标波长 {target_wl}nm -> 波段 {band_idx} (0-based)") - except Exception as e: - print(f" 无法为{color}波段找到波长 {target_wl}nm: {e}") - # 回退到基于索引的选择 - if color == 'R': - bands.append(min(band_count - 1, int(band_count * 0.25))) - elif color == 'G': - bands.append(min(band_count - 1, int(band_count * 0.15))) - else: # B - bands.append(min(band_count - 1, int(band_count * 0.05))) - - # 如果获取的波段不完整,使用默认值 - if len(bands) != 3: - print("警告: 波段选择不完整,使用默认波段") - return self._select_rgb_bands_by_index(band_count) - - return bands - - except Exception as e: - print(f"根据波长选择RGB波段时出错: {e},使用默认波段选择") - return self._select_rgb_bands_by_index(band_count) - - def _select_rgb_bands_by_index(self, band_count: int) -> List[int]: - """ - 基于波段序号的RGB波段选择(回退方法) - """ - if band_count >= 3: - r_band = min(band_count - 1, max(2, int(band_count * 0.25))) # ~25%位置作为红波段 - g_band = min(band_count - 1, max(1, int(band_count * 0.15))) # ~15%位置作为绿波段 - b_band = min(band_count - 1, max(0, int(band_count * 0.05))) # ~5%位置作为蓝波段 - return [r_band, g_band, b_band] - else: - return [0, 0, 0] - - def step2_find_glint_area(self, img_path: str, - glint_wave: float = 750.0, - method: str = 'otsu', - z_threshold: float = 2.5, - percentile: float = 95.0, - iqr_multiplier: float = 1.5, - window_size: int = 15, - multi_band_waves: Optional[List[float]] = None, - sub_method: str = 'zscore', - weights: Optional[List[float]] = None, - max_area: Optional[int] = None, - buffer_size: Optional[int] = None, - water_mask_path: Optional[str] = None, - skip_dependency_check: bool = False, **kwargs) -> str: - """步骤2: 找到耀斑区域(Facade)""" - step_start_time = time.time() - try: - result = GlintDetectionStep.run( - img_path=img_path, - glint_wave=glint_wave, - method=method, - z_threshold=z_threshold, - percentile=percentile, - iqr_multiplier=iqr_multiplier, - window_size=window_size, - multi_band_waves=multi_band_waves, - sub_method=sub_method, - weights=weights, - max_area=max_area, - buffer_size=buffer_size, - water_mask_path=water_mask_path, - glint_dir=str(self.glint_dir), - callback=self._notify, - ) - self.glint_mask_path = result - step_end_time = time.time() - self._record_step_time("步骤2: 找到耀斑区域", step_start_time, step_end_time) - return result - except Exception as e: - step_end_time = time.time() - self._record_step_time("步骤2: 找到耀斑区域", step_start_time, step_end_time, - status="failed", error=str(e)) - raise - - def _get_image_geo_info(self, img_path: str) -> tuple: - """获取影像地理信息,转发至工具模块""" - return _get_image_geo_info(img_path) - - def _load_image_as_array(self, img_path: str) -> tuple: - """加载影像为numpy数组,转发至工具模块""" - return _load_image_as_array(img_path) - - def _save_array_as_image(self, image_array: np.ndarray, output_path: str, - geotransform: tuple, projection: str, - dtype=None) -> str: - """保存numpy数组为影像,转发至工具模块""" - return _save_array_as_image(image_array, output_path, geotransform, projection, dtype) - - def _save_bands_as_image(self, corrected_bands: list, output_path: str, - geotransform: tuple, projection: str, - dtype=None) -> str: - """从波段列表保存影像,转发至工具模块""" - return _save_bands_as_image(corrected_bands, output_path, geotransform, projection, dtype) - - def _prepare_water_mask_for_algorithm(self, water_mask: Optional[Union[str, np.ndarray]], - image_shape: Union[tuple, np.ndarray], - geotransform: tuple, - projection: str, - img_path: str) -> Optional[np.ndarray]: - """准备水体掩膜供算法使用,转发至工具模块""" - return _prepare_water_mask_for_algorithm( - water_mask=water_mask, - image_shape=image_shape, - geotransform=geotransform, - projection=projection, - img_path=img_path, - water_mask_dir=str(self.water_mask_dir), - callback=getattr(self, 'callback', None) - ) - def _copy_hdr_info(self, source_img_path: str, dest_img_path: str): - """复制hdr文件信息,转发至工具模块""" - return _copy_hdr_info(source_img_path, dest_img_path) - - def _interpolate_zero_pixels(self, img_path: str, - interpolation_method: str = 'nearest', - output_path: Optional[str] = None, - water_mask: Optional[Union[str, np.ndarray]] = None) -> str: - """ - 对影像中所有波段都为0的像素点进行插值(转发至算法模块) - - Args: - img_path: 输入影像文件路径 - interpolation_method: 插值方法,支持: - - 'nearest': 邻近插值(最快) - - 'bilinear': 双线性插值 - - 'spline': 样条插值(RBF) - - 'kriging': 克里金插值(最慢但最准确) - output_path: 输出文件路径(如果为None,自动生成) - water_mask: 水域掩膜,用于限制插值区域(可选) - - Returns: - 插值后的影像文件路径 - """ - if not SCIPY_AVAILABLE: - raise ImportError("scipy未安装,无法进行0值像素插值") - if not GDAL_AVAILABLE: - raise ImportError("GDAL未安装,无法读取影像文件") - - print(f"\n开始对0值像素进行插值,方法: {interpolation_method}") - print("注意: 只处理所有波段都为0的像素点") - - # 转发至算法模块 - result_path, _ = _interpolate_zero_pixels_batch( - img_path=img_path, - interpolation_method=interpolation_method, - output_path=output_path, - water_mask=water_mask, - deglint_dir=str(self.deglint_dir), - callback_progress=lambda msg: print(f" {msg}") - ) - - self.interpolated_img_path = result_path - return result_path - - def step3_remove_glint(self, img_path: str, - method: str = "subtract_nir", - start_wave: Optional[float] = None, - end_wave: Optional[float] = None, - json_path: Optional[str] = None, - left_shoulder_wave: Optional[float] = None, - valley_wave: Optional[float] = None, - right_shoulder_wave: Optional[float] = None, - water_mask_path: Optional[Union[str, np.ndarray]] = None, - interpolate_zeros: bool = False, - interpolation_method: str = 'nearest', - enabled: bool = True, - kutser_shp_path: Optional[str] = None, - oxy_band: int = 38, - lower_oxy: int = 36, - upper_oxy: int = 49, - nir_band: int = 47, - nir_lower: int = 25, - nir_upper: int = 37, - goodman_A: float = 0.000019, - goodman_B: float = 0.1, - hedley_shp_path: Optional[str] = None, - hedley_nir_band: int = 47, - sugar_bounds: Optional[List[tuple]] = None, - sugar_sigma: float = 1.0, - sugar_estimate_background: bool = True, - sugar_glint_mask_method: str = "cdf", - sugar_iter: Optional[int] = 3, - sugar_termination_thresh: float = 20.0, - output_path: Optional[str] = None, - skip_dependency_check: bool = False, **kwargs) -> str: - """步骤3: 去除耀斑(Facade)""" - step_start_time = time.time() - try: - result = GlintRemovalStep.run( - img_path=img_path, - method=method, - start_wave=start_wave, - end_wave=end_wave, - json_path=json_path, - left_shoulder_wave=left_shoulder_wave, - valley_wave=valley_wave, - right_shoulder_wave=right_shoulder_wave, - water_mask=water_mask_path, - interpolate_zeros=interpolate_zeros, - interpolation_method=interpolation_method, - enabled=enabled, - kutser_shp_path=kutser_shp_path, - oxy_band=oxy_band, - lower_oxy=lower_oxy, - upper_oxy=upper_oxy, - nir_band=nir_band, - nir_lower=nir_lower, - nir_upper=nir_upper, - goodman_A=goodman_A, - goodman_B=goodman_B, - hedley_shp_path=hedley_shp_path, - hedley_nir_band=hedley_nir_band, - sugar_bounds=sugar_bounds, - sugar_sigma=sugar_sigma, - sugar_estimate_background=sugar_estimate_background, - sugar_glint_mask_method=sugar_glint_mask_method, - sugar_iter=sugar_iter, - sugar_termination_thresh=sugar_termination_thresh, - _get_image_geo_info=self._get_image_geo_info, - _load_image_as_array=self._load_image_as_array, - _save_bands_as_image=self._save_bands_as_image, - _copy_hdr_info=self._copy_hdr_info, - _prepare_water_mask_for_algorithm=self._prepare_water_mask_for_algorithm, - _interpolate_zero_pixels_batch=_interpolate_zero_pixels_batch, - deglint_dir=str(self.deglint_dir), - water_mask_dir=str(self.water_mask_dir), - callback=self._notify, - output_path=output_path, - ) - self.deglint_img_path = result - step_end_time = time.time() - self._record_step_time("步骤3: 去除耀斑", step_start_time, step_end_time) - return result - except Exception as e: - step_end_time = time.time() - self._record_step_time("步骤3: 去除耀斑", step_start_time, step_end_time, - status="failed", error=str(e)) - raise - - def step5_process_csv(self, csv_path: str, skip_dependency_check: bool = False, **kwargs) -> str: - """ - 步骤4: 对csv文件进行处理,筛选剔除异常值 - - Args: - csv_path: 输入CSV文件路径 - skip_dependency_check: 是否跳过依赖检查(为保持一致性而保留) - - Returns: - 处理后的CSV文件路径 - """ - self._notify("started", "步骤4: 处理CSV文件") - result = DataPreparationStep.process_csv( - csv_path=csv_path, - output_dir=str(self.processed_data_dir), - ) - self.processed_csv_path = result - self._record_step_time("步骤4: 处理CSV文件", 0, 0) - self._notify("completed", f"处理后的CSV文件已保存: {result}") - return result - - def step6_extract_spectra(self, deglint_img_path: Optional[str] = None, - radius: int = 5, - source_epsg: int = 4326, - csv_path: Optional[str] = None, - boundary_path: Optional[str] = None, - glint_mask_path: Optional[str] = None, - skip_dependency_check: bool = False, **kwargs) -> str: - """ - 步骤5: 根据csv文件的采样点坐标,在去除耀斑的文件中统计采样点的平均光谱 - - Args: - deglint_img_path: 去除耀斑后的影像文件路径(如果为None,使用步骤3的结果) - radius: 采样半径(像素) - source_epsg: 源坐标系EPSG代码 - csv_path: CSV文件路径(如果为None,使用步骤4的结果) - boundary_path: 水体掩膜文件路径(如果为None,自动生成水体掩膜) - glint_mask_path: 耀斑掩膜栅格路径(.dat/.tif);若提供则优先使用,否则使用步骤2生成的路径 - - Returns: - 包含光谱数据的CSV文件路径 - """ - # 参数解析(保留原逻辑,处理None值) - if deglint_img_path is not None: - img_path = deglint_img_path - elif self.deglint_img_path is not None: - img_path = self.deglint_img_path - else: - img_path = None - if csv_path is not None: - final_csv_path = csv_path - elif self.processed_csv_path is not None: - final_csv_path = self.processed_csv_path - else: - final_csv_path = None - - self._notify("started", "步骤5: 提取训练样本点光谱") - result = DataPreparationStep.extract_training_spectra( - deglint_img_path=img_path, - radius=radius, - source_epsg=source_epsg, - csv_path=final_csv_path, - boundary_path=boundary_path, - glint_mask_path=glint_mask_path, - water_mask_path=self.water_mask_path, - output_dir=str(self.training_spectra_dir), - ) - self.training_csv_path = result - self._record_step_time("步骤5: 提取训练样本点光谱", 0, 0) - self._notify("completed", f"训练光谱数据已保存: {result}") - return result - - def step7_calc_indices(self, - training_csv_path: Optional[str] = None, - formula_csv_file: Optional[str] = None, - formula_names: Optional[List[str]] = None, - output_file: Optional[str] = None, - enabled: bool = True, - skip_dependency_check: bool = False, **kwargs) -> str: - """ - 步骤5.5: 根据训练光谱计算水质光谱指数 - - 使用band_math.py中的方法实现,支持从公式CSV文件中批量计算指定公式 - - Args: - training_csv_path: 训练光谱数据CSV路径(如果为None,使用步骤5的结果) - formula_csv_file: 公式CSV文件路径,包含公式名称和具体公式 - formula_names: 要计算的公式名称列表,如果为None则计算所有公式 - output_file: 输出文件完整路径(支持绝对路径),如果为None则使用默认路径 - - Returns: - 包含计算结果的新CSV文件路径 - """ - # 参数解析(保留原逻辑) - if training_csv_path is not None: - csv_path = training_csv_path - elif self.training_csv_path is not None: - csv_path = self.training_csv_path - else: - csv_path = None - - self._notify("started", "步骤5.5: 计算水质光谱指数") - result = DataPreparationStep.calculate_water_quality_indices( - training_csv_path=csv_path, - formula_csv_file=formula_csv_file, - formula_names=formula_names, - output_file=output_file, - enabled=enabled, - output_dir=str(self.indices_dir), - ) - self.indices_path = result - self._record_step_time("步骤5.5: 计算水质光谱指数", 0, 0) - if result: - self._notify("completed", f"水质指数已保存: {result}") - return result - - def step8_train_ml(self, feature_start_column: str = "374.285004", - preprocessing_methods: List[str] = None, - model_names: List[str] = None, - split_methods: List[str] = None, - cv_folds: int = 5, - training_csv_path: Optional[str] = None, - skip_dependency_check: bool = False, **kwargs) -> str: - """ - 步骤6: 使用采样点的平均光谱和对应的实测值建立机器学习模型,保存模型权重 - - Args: - feature_start_column: 特征开始列名或索引 - preprocessing_methods: 预处理方法列表 - model_names: 模型名称列表 - split_methods: 数据划分方法列表 - cv_folds: 交叉验证折数 - - Returns: - 模型保存目录路径 - """ - # 参数解析(保留原逻辑) - if training_csv_path is not None: - final_csv_path = training_csv_path - elif self.training_csv_path is not None: - final_csv_path = self.training_csv_path - else: - final_csv_path = None - - self._notify("started", "步骤6: 训练机器学习模型") - result = ModelingStep.train_models( - feature_start_column=feature_start_column, - preprocessing_methods=preprocessing_methods, - model_names=model_names, - split_methods=split_methods, - cv_folds=cv_folds, - training_csv_path=final_csv_path, - output_dir=str(self.models_dir), - _report_generator=self.report_generator, - ) - self._record_step_time("步骤6: 训练机器学习模型", 0, 0) - self._notify("completed", f"模型训练完成,结果保存在: {result}") - return result - - def step8_qaa_inversion(self, **config): - """步骤8: QAA 物理推导(非经验模型)""" - import numpy as np - import pandas as pd - from src.core.algorithms.qaa import QAABaselineSolver - from src.utils.water_owt_config import get_lambda_0 - - qaa_cfg = config.get('step8_qaa', {}) - lake_name = qaa_cfg.get('lake_name', 'Unknown') - lambda_0 = qaa_cfg.get('lambda_0', get_lambda_0(lake_name)) - output_dir = os.path.join(self.work_dir, "8_QAA_Inversion") - os.makedirs(output_dir, exist_ok=True) - output_path = qaa_cfg.get('output_path') or os.path.join(output_dir, "a_lambda_results.csv") - - spectrum_csv = qaa_cfg.get('spectrum_csv_path') - if not spectrum_csv: - spectrum_csv = config.get('training_csv_path') - if not spectrum_csv or not os.path.exists(spectrum_csv): - # 回退:扫描 work_dir 下 step5 的产物目录,找第一个 .csv - fallback_candidates = [] - step5_dir = os.path.join(self.work_dir, "6_Spectral_Feature_Extraction") - if os.path.isdir(step5_dir): - for f in sorted(os.listdir(step5_dir)): - if f.lower().endswith('.csv'): - fallback_candidates.append(os.path.join(step5_dir, f)) - if fallback_candidates: - spectrum_csv = fallback_candidates[0] - msg = f"[Step 8] spectrum_csv_path 为空,已自动回退到 step5 产物: {spectrum_csv}" - (self.logger.info if hasattr(self, 'logger') else print)(msg) - else: - msg = f"[Step 8] 训练光谱 CSV 不存在或路径为空: {spectrum_csv}" - (self.logger.info if hasattr(self, 'logger') else print)(msg) - return - - df = pd.read_csv(spectrum_csv, encoding="utf-8-sig") - col_names = df.columns.tolist() - - wavelength_col_idx = None - for i, col in enumerate(col_names): - try: - float(col) - wavelength_col_idx = i - break - except (ValueError, TypeError): - pass - - if wavelength_col_idx is None: - msg = "[Step 8] 无法从 CSV 列名中识别波长信息" - (self.logger.info if hasattr(self, 'logger') else print)(msg) - return - - # ── 源头透传:提取元数据列(左列),与数据行一一对应 ──────────── - meta_df = df.iloc[:, :wavelength_col_idx].copy() - - wavelengths = np.array([float(c) for c in col_names[wavelength_col_idx:]], dtype=np.float64) - data_matrix = df.iloc[:, wavelength_col_idx:].values.astype(np.float64) - if data_matrix.ndim == 1: - data_matrix = data_matrix[np.newaxis, :] - - solver = QAABaselineSolver() - raw_result = solver.run_inversion(wavelengths, data_matrix, lambda_0) - - # run_inversion 返回:单样本 → dict,多样本 → list[dict] - if isinstance(raw_result, list): - sample_results = raw_result - else: - sample_results = [raw_result] - - rows_out = [] - for i, sample_result in enumerate(sample_results): - wl_arr = wavelengths - a_arr = sample_result['a_lambda'] - bb_arr = sample_result['bb_lambda'] - # 取出第 i 个样本的元数据行(如 X, Y, Lon, Lat 等),与光谱行一一对齐 - meta_row = meta_df.iloc[i].to_dict() if i < len(meta_df) else {} - for j, wl in enumerate(wl_arr): - rows_out.append({ - 'sample_id': f"sample_{i}", - 'Wavelength': wl, - 'a_lambda': a_arr[j], - 'bb_lambda': bb_arr[j], - **meta_row, - }) - - result_df = pd.DataFrame(rows_out) - result_df.to_csv(output_path, index=False, float_format='%.8f') - - msg = f"Step 8: QAA 反演完毕,水域={lake_name},λ₀={lambda_0}nm,结果保存于: {output_path}" - (self.logger.info if hasattr(self, 'logger') else print)(msg) - - def step9_concentration_inversion(self, **config): - """步骤9: 浓度反演(基于 QAA Step 8 输出的 a_lambda/bb_lambda)""" - from src.core.algorithms.concentration_inversion import ConcentrationPipeline - - conc_cfg = config.get('step9_concentration', {}) - input_csv = conc_cfg.get('input_csv') - output_csv = conc_cfg.get('output_csv') - lake_case = conc_cfg.get('lake_case', 'medium') - - if not input_csv or not os.path.exists(input_csv): - msg = f"[Step 9] QAA 结果文件不存在或路径为空: {input_csv}" - (self.logger.info if hasattr(self, 'logger') else print)(msg) - return - - if not output_csv: - output_dir = os.path.join(self.work_dir, "9_Concentration") - os.makedirs(output_dir, exist_ok=True) - output_csv = os.path.join(output_dir, "final_concentrations.csv") - - pipeline = ConcentrationPipeline(lake_case=lake_case) - result_csv = pipeline.run_pipeline(input_csv, output_csv) - - msg = f"Step 9: 浓度反演完毕,结果保存于: {result_csv}" - (self.logger.info if hasattr(self, 'logger') else print)(msg) - - def step4_sampling(self, deglint_img_path: Optional[str] = None, - interval: int = 50, - sample_radius: int = 5, - chunk_size: int = 1000, - water_mask_path: Optional[str] = None, - glint_mask_path: Optional[str] = None, - use_adaptive_sampling: bool = True, - skip_dependency_check: bool = False, **kwargs) -> str: - """ - 步骤10: 生成根据水域掩膜内且耀斑掩膜外的采样点,统计采样点的平均光谱 - - Args: - deglint_img_path: 去除耀斑后的影像文件路径(如果为None,使用步骤3的结果) - interval: 采样点间隔(像元数) - sample_radius: 采样点半径(像元数) - chunk_size: 每次处理的行数(控制内存使用) - water_mask_path: dat格式的水域掩膜文件路径(如果为None,将使用步骤1生成的dat格式掩膜) - use_adaptive_sampling: 是否启用自适应采样(根据水体宽度动态调整间隔) - - Returns: - 采样点光谱数据CSV文件路径 - """ - # 参数解析(保留原逻辑) - if deglint_img_path is not None: - img_path = deglint_img_path - elif self.deglint_img_path is not None: - img_path = self.deglint_img_path - else: - img_path = None - if water_mask_path is None and self.water_mask_path is not None: - water_mask_path = self.water_mask_path - - self._notify("started", "步骤10: 生成预测采样点") - result = PredictionStep.generate_sampling_points( - deglint_img_path=img_path, - interval=interval, - sample_radius=sample_radius, - chunk_size=chunk_size, - water_mask_path=water_mask_path, - glint_mask_path=glint_mask_path, - output_dir=str(self.sampling_dir), - use_adaptive_sampling=use_adaptive_sampling, - ) - self._record_step_time("步骤10: 生成预测采样点", 0, 0) - self._notify("completed", f"采样点光谱数据已保存: {result}") - return result - - def step9_predict_ml(self, sampling_csv_path: str, - models_dir: Optional[str] = None, - metric: str = 'test_r2', - prediction_column: str = 'prediction', - skip_dependency_check: bool = False, **kwargs) -> Dict[str, str]: - """ - 步骤11: 将训练好的最佳机器学习模型应用到采样点的平均光谱上,预测水质参数 - - Args: - sampling_csv_path: 采样点光谱数据CSV路径 - models_dir: 模型保存目录(如果为None,使用步骤7的结果) - metric: 选择最佳模型的指标 - prediction_column: 预测结果列名 - - Returns: - 预测结果文件路径字典(键为目标列名) - """ - _external_models_dict = kwargs.get('_external_models_dict') - _external_model = kwargs.get('_external_model') - _external_model_path = kwargs.get('_external_model_path') - _external_model_dir = kwargs.get('_external_model_dir') - print(f"[Pipeline] 收到字典: {'Yes' if _external_models_dict else 'No'}" - f", 收到单模型: {'Yes' if _external_model else 'No'}") - - self._notify("started", "步骤11: 预测水质参数") - result = PredictionStep.predict_water_quality( - sampling_csv_path=sampling_csv_path, - models_dir=models_dir if models_dir else str(self.models_dir), - metric=metric, - prediction_column=prediction_column, - output_dir=str(self.prediction_dir / "9_ML_Prediction"), - _report_generator=self.report_generator, - _external_model=_external_model, - _external_model_path=_external_model_path, - _external_models_dict=_external_models_dict, - _external_model_dir=_external_model_dir, - ) - self._record_step_time("步骤11: 预测水质参数", 0, 0) - self._notify("completed", f"预测完成,结果保存在: {self.prediction_dir}") - return result - - def step10_map(self, prediction_csv_path: str, - boundary_shp_path: str, - output_image_path: Optional[str] = None, - resolution: float = 30, - input_crs: str = 'EPSG:32651', - output_crs: str = 'EPSG:4326', - show_sample_points: bool = False, - base_map_tif: Optional[str] = None, - use_distance_diffusion: bool = True, - max_diffusion_distance: Optional[float] = None, - diffusion_power: float = 2, - diffusion_n_neighbors: int = 15, - cmap: Optional[str] = None, - expand_ratio: float = 0.05, - skip_dependency_check: bool = False, **kwargs) -> str: - """ - 步骤9: 根据采样点的坐标和反演的实测参数,以及水域掩膜,通过插值的方法,得到水质参数的可视化分布图 - - Args: - prediction_csv_path: 预测结果CSV文件路径(前两列为经纬度,第三列为预测值) - boundary_shp_path: 边界shapefile文件路径 - output_image_path: 输出图片路径(已废弃:本函数强制写入 self.visualization_dir,参数仅保留签名兼容) - resolution: 插值网格分辨率(米) - input_crs: 输入坐标系 - output_crs: 输出坐标系 - show_sample_points: 是否在图上显示采样点 - base_map_tif: 底图TIF路径,用于在水域掩膜外显示底图 - use_distance_diffusion: 是否启用距离扩散补全边界 - max_diffusion_distance: 距离扩散的最大距离(米),None表示自动计算 - diffusion_power: 距离扩散的幂参数 - diffusion_n_neighbors: 距离扩散时使用的最近邻数量 - cmap: 指定的颜色映射名称,None表示自动识别 - expand_ratio: 边界外扩比例(0-1之间) - - Returns: - 可视化分布图文件路径(始终位于 self.visualization_dir 下) - """ - # 修复:所有分布图(PNG)与底层 Kriging 输出的派生文件必须落到 14_visualization。 - # 不论调用方(panel / 主流程 run_full_pipeline / 批量线程)传入什么路径, - # 都在此强制 override 为 self.visualization_dir,规避 - # (a) 调用方误传 prediction_dir (11_12_13_predictions) 之类错位路径 - # (b) 老代码里硬编码字符串残留 - # 若调用方传入的路径仍在 self.visualization_dir 内(子目录/不同文件名)则尊重其意图。 - csv_name = Path(prediction_csv_path).stem - forced_image_path = str(self.visualization_dir / f"{csv_name}_distribution.png") - viz_dir_resolved = str(self.visualization_dir) - if output_image_path and output_image_path != forced_image_path: - # 判断调用方路径是否落在 visualization_dir 内(用 str.startswith 轻量检查) - norm_user = output_image_path.replace('\\', '/').rstrip('/') - norm_viz = viz_dir_resolved.replace('\\', '/').rstrip('/') - if not norm_user.startswith(norm_viz + '/') and norm_user != norm_viz: - print( - f"⚠️ [step10_map] 调用方传入 output_image_path={output_image_path!r} " - f"不在 {viz_dir_resolved} 下,强制重定向到 {forced_image_path}" - ) - output_image_path = forced_image_path - else: - # 调用方路径已在 visualization_dir 内(如子目录),保留意图 - output_image_path = output_image_path - else: - output_image_path = forced_image_path - - self._notify("started", "步骤9: 生成水质参数可视化分布图") - result = MappingStep.generate_distribution_map( - prediction_csv_path=prediction_csv_path, - boundary_shp_path=boundary_shp_path, - output_image_path=output_image_path, - resolution=resolution, - input_crs=input_crs, - output_crs=output_crs, - show_sample_points=show_sample_points, - base_map_tif=base_map_tif, - use_distance_diffusion=use_distance_diffusion, - max_diffusion_distance=max_diffusion_distance, - diffusion_power=diffusion_power, - diffusion_n_neighbors=diffusion_n_neighbors, - cmap=cmap, - expand_ratio=expand_ratio, - output_dir=str(self.visualization_dir), - ) - self._record_step_time("步骤9: 生成分布图", 0, 0) - self._notify("completed", f"可视化分布图已保存: {result}") - return result - - def generate_model_scatter_plots(self, training_csv_path: Optional[str] = None, - models_dir: Optional[str] = None, - metric: str = 'test_r2', - use_enhanced: bool = True, - feature_start_column: Union[str, int] = 13, - test_size: float = 0.2, - random_state: int = 42) -> Dict[str, str]: - """ - 生成模型评估散点图(真实值vs预测值) - - Args: - training_csv_path: 训练数据CSV路径(如果为None,使用步骤5的结果) - models_dir: 模型保存目录(如果为None,使用步骤6的结果) - metric: 选择最佳模型的指标 - use_enhanced: 是否使用增强版散点图(带置信区间,使用sctter_batch) - feature_start_column: 特征开始列名或索引 - test_size: 测试集比例 - random_state: 随机种子 - - Returns: - 散点图文件路径字典(键为目标参数名) - """ - print("\n" + "="*80) - print("生成模型评估散点图") - print("="*80) - - if training_csv_path is None: - training_csv_path = self.training_csv_path - if training_csv_path is None: - raise ValueError("请提供训练数据CSV路径,或先执行步骤5") - - if models_dir is None: - models_dir = str(self.models_dir) - - scatter_paths = {} - models_path = Path(models_dir) - - # 如果使用增强版散点图(带置信区间) - if use_enhanced: - print("使用增强版散点图(带置信区间)") - try: - # 使用sctter_batch批量生成散点图 - results = self.scatter_batch.batch_plot_scatter( - models_root_dir=models_dir, - csv_path=training_csv_path, - output_dir=str(self.visualization_dir / "scatter_plots"), - metric=metric, - target_column=None, # 使用文件夹名称作为目标列名 - feature_start_column=feature_start_column, - test_size=test_size, - random_state=random_state - ) - - # 提取成功生成的散点图路径 - for target_name, result in results.items(): - if result.get('status') == 'success': - scatter_paths[target_name] = result.get('save_path', '') - print(f" ✓ {target_name}: {result.get('save_path', '')}") - else: - print(f" ✗ {target_name}: 失败 - {result.get('error', '未知错误')}") - - except Exception as e: - print(f"使用增强版散点图时出错: {e}") - print("回退到基础版散点图") - use_enhanced = False - - # 如果未使用增强版或增强版失败,使用基础版 - if not use_enhanced or not scatter_paths: - print("使用基础版散点图") - from src.core.prediction.inference_batch import WaterQualityInference - - # 遍历所有目标参数文件夹 - for target_folder in models_path.iterdir(): - if not target_folder.is_dir(): - continue - - target_name = target_folder.name - print(f"\n处理目标参数: {target_name}") - - try: - # 加载最佳模型进行评估 - inferencer = WaterQualityInference(str(target_folder)) - eval_result = inferencer.evaluate_with_split( - data_csv_path=training_csv_path, - split_method="spxy", - test_size=test_size, - random_state=random_state, - metric=metric - ) - - # 提取预测结果 - predictions = eval_result.get('predictions', {}) - if predictions: - y_train_true = predictions.get('y_train_true') - y_train_pred = predictions.get('y_train_pred') - y_test_true = predictions.get('y_test_true') - y_test_pred = predictions.get('y_test_pred') - metrics = eval_result.get('test_metrics', {}) - - if y_train_true is not None and y_test_true is not None: - # 合并训练集和测试集 - y_all_true = np.concatenate([y_train_true, y_test_true]) - y_all_pred = np.concatenate([y_train_pred, y_test_pred]) - - # 生成索引 - train_indices = np.arange(len(y_train_true)) - test_indices = np.arange(len(y_train_true), len(y_all_true)) - - # 绘制散点图 - scatter_path = self.visualizer.plot_scatter_true_vs_pred( - y_true=y_all_true, - y_pred=y_all_pred, - target_name=target_name, - train_indices=train_indices, - test_indices=test_indices, - metrics={ - 'train_r2': eval_result.get('train_metrics', {}).get('r2', 0), - 'test_r2': metrics.get('r2', 0), - 'train_rmse': eval_result.get('train_metrics', {}).get('rmse', 0), - 'test_rmse': metrics.get('rmse', 0) - } - ) - scatter_paths[target_name] = scatter_path - except Exception as e: - print(f"处理目标参数 {target_name} 时出错: {e}") - continue - - print(f"\n散点图生成完成,共生成 {len(scatter_paths)} 个图表") - return scatter_paths - - def generate_spectrum_comparison_plots(self, csv_path: Optional[str] = None, - parameter_columns: Optional[List[str]] = None, - wavelength_start_column: Union[str, int] = "UTM_Y") -> Dict[str, str]: - """ - 生成光谱曲线对比图(不同参数值的光谱曲线对比) - - Args: - csv_path: 包含光谱和参数值的CSV文件路径(如果为None,使用步骤5的结果) - parameter_columns: 参数列名列表(如果为None,自动检测) - wavelength_start_column: 波长开始列名或索引 - - Returns: - 光谱曲线图文件路径字典(键为参数名) - """ - print("\n" + "="*80) - print("生成光谱曲线对比图") - print("="*80) - - if csv_path is None: - csv_path = self.training_csv_path - if csv_path is None: - raise ValueError("请提供CSV文件路径,或先执行步骤5") - - # 读取数据以检测参数列 - df = pd.read_csv(csv_path) - - if parameter_columns is None: - # 自动检测参数列(排除坐标列和光谱列) - if isinstance(wavelength_start_column, str): - try: - wavelength_start_idx = df.columns.get_loc(wavelength_start_column) - except: - wavelength_start_idx = 13 # 默认值 - else: - wavelength_start_idx = wavelength_start_column - - # 假设前几列是参数列(根据实际数据结构调整) - parameter_columns = list(df.columns[:wavelength_start_idx]) - # 排除坐标列(通常是前两列) - if len(parameter_columns) > 2: - parameter_columns = parameter_columns[2:] - - spectrum_paths = {} - for param_col in parameter_columns: - if param_col not in df.columns: - continue - - print(f"\n处理参数: {param_col}") - try: - spectrum_path = self.visualizer.plot_spectrum_by_parameter( - csv_path=csv_path, - parameter_column=param_col, - wavelength_start_column=wavelength_start_column, - n_groups=5 - ) - spectrum_paths[param_col] = spectrum_path - except Exception as e: - print(f"处理参数 {param_col} 时出错: {e}") - continue - - print(f"\n光谱曲线图生成完成,共生成 {len(spectrum_paths)} 个图表") - return spectrum_paths - - def generate_boxplots(self, csv_path: Optional[str] = None, - parameter_columns: Optional[List[str]] = None, - data_start_column: int = 4, - save_individual: bool = True, - use_seaborn: bool = True) -> Dict[str, str]: - """ - 生成水质参数的箱型图 - - Args: - csv_path: CSV文件路径(如果为None,使用步骤4的结果) - parameter_columns: 参数列名列表(如果为None,自动检测) - data_start_column: 数据开始列索引(从第几列开始,默认第5列,索引为4) - save_individual: 是否为每个参数单独保存箱型图 - use_seaborn: 是否使用seaborn绘制(更美观) - - Returns: - 箱型图文件路径字典 - """ - print("\n" + "="*80) - print("生成水质参数箱型图") - print("="*80) - - if csv_path is None: - csv_path = self.processed_csv_path - if csv_path is None: - raise ValueError("请提供CSV文件路径,或先执行步骤4") - - # 读取数据 - df = pd.read_csv(csv_path) - - # 确定参数列 - if parameter_columns is None: - # 从指定列开始的所有列 - data_columns = df.iloc[:, data_start_column:] - parameter_columns = list(data_columns.columns) - else: - # 使用指定的列 - parameter_columns = [col for col in parameter_columns if col in df.columns] - - if not parameter_columns: - print("警告: 未找到有效的参数列") - return {} - - # 创建输出目录 - boxplot_dir = self.visualization_dir / "boxplots" - boxplot_dir.mkdir(parents=True, exist_ok=True) - - boxplot_paths = {} - - if save_individual: - # 为每个参数单独绘制箱型图 - print(f"为每个参数单独绘制箱型图(共 {len(parameter_columns)} 个参数)") - - for column in parameter_columns: - if column not in df.columns: - continue - - # 移除空值 - clean_data = df[column].dropna() - - if len(clean_data) == 0: - print(f"跳过列 '{column}': 没有有效数据") - continue - - try: - # 创建新图形 - plt.figure(figsize=(8, 6)) - - if use_seaborn: - # 使用seaborn绘制 - plot_data = pd.DataFrame({ - '参数': [column] * len(clean_data), - '数值': clean_data - }) - sns.boxplot(data=plot_data, x='参数', y='数值', palette='Set2') - sns.stripplot(data=plot_data, x='参数', y='数值', - color='red', alpha=0.6, size=5, jitter=True) - else: - # 使用matplotlib绘制 - box_plot = plt.boxplot([clean_data], labels=[column], - patch_artist=True, showfliers=False) - box_plot['boxes'][0].set_facecolor('lightblue') - box_plot['boxes'][0].set_alpha(0.7) - - # 添加散点 - x_pos = np.random.normal(1, 0.04, size=len(clean_data)) - plt.scatter(x_pos, clean_data, alpha=0.6, s=30, color='red', - edgecolors='black', linewidth=0.5, zorder=3) - - # 设置标题和标签 - plt.title(f'{column} - 箱型图', fontsize=14, fontweight='bold') - plt.xlabel('参数', fontsize=12) - plt.ylabel('数值', fontsize=12) - - # 添加统计信息 - stats_text = (f'数据点数: {len(clean_data)}\n' - f'均值: {clean_data.mean():.2f}\n' - f'中位数: {clean_data.median():.2f}\n' - f'标准差: {clean_data.std():.2f}') - plt.text(0.02, 0.98, stats_text, transform=plt.gca().transAxes, - verticalalignment='top', - bbox=dict(boxstyle='round', - facecolor='wheat' if not use_seaborn else 'lightgreen', - alpha=0.8)) - - # 添加网格 - plt.grid(True, alpha=0.3, linestyle='--') - - # 调整布局 - plt.tight_layout() - - # 保存图片 - safe_column_name = column.replace('/', '_').replace('\\', '_').replace(':', '_') - save_path = boxplot_dir / f'{safe_column_name}_boxplot.png' - plt.savefig(save_path, dpi=300, bbox_inches='tight') - plt.close() - - boxplot_paths[column] = str(save_path) - print(f" 已保存: {save_path.name}") - - except Exception as e: - print(f" 处理参数 {column} 时出错: {e}") - continue - - # 生成所有参数的综合箱型图 - try: - print("\n生成综合箱型图(所有参数在一张图上)") - plt.figure(figsize=(max(12, len(parameter_columns) * 0.8), 8)) - - # 准备数据 - box_data = [] - labels = [] - for column in parameter_columns: - if column in df.columns: - clean_data = df[column].dropna() - if len(clean_data) > 0: - box_data.append(clean_data) - labels.append(column) - - if box_data: - if use_seaborn: - # 使用seaborn绘制 - melted_data = pd.melt(df[labels], var_name='参数', value_name='数值') - melted_data = melted_data.dropna() - sns.boxplot(data=melted_data, x='参数', y='数值', palette='Set3') - sns.stripplot(data=melted_data, x='参数', y='数值', - color='red', alpha=0.6, size=4, jitter=True) - else: - # 使用matplotlib绘制 - box_plot = plt.boxplot(box_data, labels=labels, patch_artist=True, - showfliers=False) - colors = plt.cm.Set3(np.linspace(0, 1, len(box_data))) - for patch, color in zip(box_plot['boxes'], colors): - patch.set_facecolor(color) - patch.set_alpha(0.7) - - # 添加散点 - for i, data in enumerate(box_data): - x_pos = np.random.normal(i + 1, 0.04, size=len(data)) - plt.scatter(x_pos, data, alpha=0.6, s=20, color='red', - edgecolors='black', linewidth=0.5, zorder=3) - - plt.title('水质参数箱型图(综合)', fontsize=16, fontweight='bold') - plt.xlabel('参数', fontsize=12) - plt.ylabel('数值', fontsize=12) - plt.xticks(rotation=45, ha='right') - plt.grid(True, alpha=0.3, linestyle='--') - plt.tight_layout() - - combined_path = boxplot_dir / 'all_parameters_boxplot.png' - plt.savefig(combined_path, dpi=300, bbox_inches='tight') - plt.close() - - boxplot_paths['all_parameters'] = str(combined_path) - print(f" 已保存综合箱型图: {combined_path.name}") - - except Exception as e: - print(f"生成综合箱型图时出错: {e}") - - print(f"\n箱型图生成完成,共生成 {len(boxplot_paths)} 个图表") - return boxplot_paths - - def generate_statistical_charts(self, csv_path: Optional[str] = None, - parameter_columns: Optional[List[str]] = None) -> Dict[str, str]: - """ - 生成统计图表(箱线图、直方图、相关性热力图) - - Args: - csv_path: CSV文件路径(如果为None,使用步骤4的结果) - parameter_columns: 参数列名列表(如果为None,自动检测) - - Returns: - 统计图表文件路径字典 - """ - print("\n" + "="*80) - print("生成统计图表") - print("="*80) - - if csv_path is None: - csv_path = self.processed_csv_path - if csv_path is None: - raise ValueError("请提供CSV文件路径,或先执行步骤4") - - # 读取数据以检测参数列 - df = pd.read_csv(csv_path) - - if parameter_columns is None: - # 自动检测参数列(排除前两列坐标列) - parameter_columns = list(df.columns[2:]) - # 过滤掉非数值列 - parameter_columns = [col for col in parameter_columns - if df[col].dtype in [np.float64, np.int64]] - - chart_paths = self.visualizer.plot_statistical_charts( - csv_path=csv_path, - parameter_columns=parameter_columns - ) - - print(f"\n统计图表生成完成") - return chart_paths - - def generate_glint_deglint_previews(self, work_dir: Optional[str] = None, - output_subdir: str = "glint_deglint_previews", - generate_glint: bool = True, - generate_deglint: bool = True) -> Dict[str, str]: - """ - 生成2_Glint_Detection和3_deglint文件夹中影像文件的PNG预览图 - - Args: - work_dir: 工作目录(如果为None,则使用self.work_dir) - output_subdir: 输出子目录名称 - generate_glint: 是否处理2_Glint_Detection文件夹 - generate_deglint: 是否处理3_deglint文件夹 - - Returns: - 生成的预览图路径字典 - """ - if work_dir is None: - work_dir = str(self.work_dir) - - print(f"\n{'='*70}") - print("步骤: 生成耀斑分析影像预览图") - print(f"{'='*70}") - - try: - preview_paths = self.visualizer.generate_glint_deglint_previews( - work_dir=work_dir, - output_subdir=output_subdir, - generate_glint=generate_glint, - generate_deglint=generate_deglint - ) - - print(f"耀斑分析影像预览图生成完成,共生成 {len(preview_paths)} 个预览图") - return preview_paths - - except Exception as e: - print(f"生成耀斑分析影像预览图时出错: {e}") - return {} - - def generate_pipeline_report(self, output_path: Optional[str] = None) -> str: - """ - 生成流程执行报告,包含每步的耗时统计 - - Args: - output_path: 输出文件路径(如果为None,自动生成) - - Returns: - 报告文件路径 - """ - if output_path is None: - timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') - output_path = str(self.reports_dir / f"pipeline_report_{timestamp}.csv") - - # 准备报告数据 - report_data = [] - total_time = 0.0 - - # 按步骤顺序排列 - step_order = [ - "步骤1: 生成水域mask", - "步骤2: 找到耀斑区域", - "步骤3: 去除耀斑", - "步骤4: 处理CSV文件", - "步骤5: 提取训练样本点光谱", - "步骤5.5: 计算水质光谱指数", - "步骤6: 训练机器学习模型", - "步骤6.5: 非经验模型训练", - "步骤6.75: 自定义回归", - "步骤7: 生成预测采样点", - "步骤8: 预测水质参数", - "步骤9: 生成分布图" - ] - - for step_name in step_order: - if step_name in self.step_timings: - timing_info = self.step_timings[step_name] - report_data.append({ - '步骤': step_name, - '开始时间': timing_info['start_time'], - '结束时间': timing_info['end_time'], - '耗时(秒)': f"{timing_info['elapsed_seconds']:.2f}", - '耗时(格式化)': timing_info['elapsed_formatted'], - '状态': timing_info['status'], - '错误信息': timing_info.get('error', '') - }) - if timing_info['status'] == 'completed': - total_time += timing_info['elapsed_seconds'] - - # 添加总计行 - if self.pipeline_start_time and self.pipeline_end_time: - pipeline_total = self.pipeline_end_time - self.pipeline_start_time - report_data.append({ - '步骤': '总计', - '开始时间': datetime.fromtimestamp(self.pipeline_start_time).strftime('%Y-%m-%d %H:%M:%S'), - '结束时间': datetime.fromtimestamp(self.pipeline_end_time).strftime('%Y-%m-%d %H:%M:%S'), - '耗时(秒)': f"{pipeline_total:.2f}", - '耗时(格式化)': self._format_time(pipeline_total), - '状态': 'completed', - '错误信息': '' - }) - - # 创建DataFrame并保存 - df_report = pd.DataFrame(report_data) - df_report.to_csv(output_path, index=False, encoding='utf-8-sig') - - # 同时生成文本格式的报告 - txt_output_path = str(Path(output_path).with_suffix('.txt')) - with open(txt_output_path, 'w', encoding='utf-8') as f: - f.write("="*80 + "\n") - f.write("水质参数反演流程执行报告\n") - f.write("="*80 + "\n\n") - - if self.pipeline_start_time and self.pipeline_end_time: - f.write(f"流程开始时间: {datetime.fromtimestamp(self.pipeline_start_time).strftime('%Y-%m-%d %H:%M:%S')}\n") - f.write(f"流程结束时间: {datetime.fromtimestamp(self.pipeline_end_time).strftime('%Y-%m-%d %H:%M:%S')}\n") - f.write(f"总耗时: {self._format_time(self.pipeline_end_time - self.pipeline_start_time)}\n\n") - - f.write("-"*80 + "\n") - f.write("各步骤执行详情:\n") - f.write("-"*80 + "\n\n") - - for step_name in step_order: - if step_name in self.step_timings: - timing_info = self.step_timings[step_name] - f.write(f"{step_name}\n") - f.write(f" 开始时间: {timing_info['start_time']}\n") - f.write(f" 结束时间: {timing_info['end_time']}\n") - f.write(f" 耗时: {timing_info['elapsed_formatted']} ({timing_info['elapsed_seconds']:.2f}秒)\n") - f.write(f" 状态: {timing_info['status']}\n") - if timing_info.get('error'): - f.write(f" 错误: {timing_info['error']}\n") - f.write("\n") - - f.write("-"*80 + "\n") - f.write("统计摘要:\n") - f.write("-"*80 + "\n") - completed_steps = [s for s in self.step_timings.values() if s['status'] == 'completed'] - failed_steps = [s for s in self.step_timings.values() if s['status'] == 'failed'] - skipped_steps = [s for s in self.step_timings.values() if s['status'] == 'skipped'] - - f.write(f"成功完成的步骤: {len(completed_steps)}\n") - f.write(f"失败的步骤: {len(failed_steps)}\n") - f.write(f"跳过的步骤: {len(skipped_steps)}\n") - - if completed_steps: - completed_times = [s['elapsed_seconds'] for s in completed_steps] - f.write(f"平均耗时: {self._format_time(np.mean(completed_times))}\n") - f.write(f"最长耗时: {self._format_time(np.max(completed_times))} ({[s['elapsed_formatted'] for s in completed_steps if s['elapsed_seconds'] == np.max(completed_times)][0]})\n") - f.write(f"最短耗时: {self._format_time(np.min(completed_times))} ({[s['elapsed_formatted'] for s in completed_steps if s['elapsed_seconds'] == np.min(completed_times)][0]})\n") - - print(f"\n流程报告已生成:") - print(f" CSV格式: {output_path}") - print(f" 文本格式: {txt_output_path}") - - return output_path - - def run_full_pipeline(self, config: Dict): - """ - 运行完整流程 - - Args: - config: 配置字典,包含所有步骤的参数 - """ - print("\n" + "="*80) - print("开始运行完整水质参数反演流程") - print("="*80) - - # 记录流程开始时间 - self.pipeline_start_time = time.time() - - try: - # 步骤1: 生成水域mask - if 'step1' in config: - self._notify("步骤1: 水域掩膜生成", "start") - self.step1_generate_water_mask(**config['step1']) - self._notify("步骤1: 水域掩膜生成", "completed", f"(输出: {self.water_mask_path})") - else: - self._notify("步骤1: 水域掩膜生成", "skipped", "未配置") - - # 步骤2: 找到耀斑区域 - # 若后续明确不进行去除耀斑(step3.enabled=False),则跳过步骤2 - step3_enabled = config.get('step3', {}).get('enabled', True) - if 'step2' in config and step3_enabled: - self._notify("步骤2: 耀斑区域检测", "start") - self.step2_find_glint_area(**config['step2']) - self._notify("步骤2: 耀斑区域检测", "completed", f"(输出: {self.glint_mask_path})") - else: - self._notify("步骤2: 耀斑区域检测", "skipped", "去耀斑已禁用或未配置") - - # 步骤3: 去除耀斑 - if 'step3' in config: - self._notify("步骤3: 耀斑去除", "start") - self.step3_remove_glint(**config['step3']) - self._notify("步骤3: 耀斑去除", "completed", f"(输出: {self.deglint_img_path})") - else: - self._notify("步骤3: 耀斑去除", "skipped", "未配置") - - # 步骤4: 处理CSV文件 - if 'step4' in config: - self._notify("步骤4: 数据预处理", "start") - self.step5_process_csv(**config['step4']) - self._notify("步骤4: 数据预处理", "completed", f"(输出: {self.processed_csv_path})") - else: - self._notify("步骤4: 数据预处理", "skipped", "未配置") - - # 步骤5: 提取训练样本点光谱 - if 'step5' in config: - self._notify("步骤5: 光谱提取", "start") - self.step6_extract_spectra(**config['step5']) - self._notify("步骤5: 光谱提取", "completed", f"(输出: {self.training_csv_path})") - else: - self._notify("步骤5: 光谱提取", "skipped", "未配置") - - # 步骤6: 计算水质指数 - if 'step6' in config: - self._notify("步骤6: 水质光谱指数计算", "start") - self.step7_calc_indices(**config['step6']) - self._notify("步骤6: 水质光谱指数计算", "completed", f"(输出: {self.indices_path})") - else: - self._notify("步骤6: 水质光谱指数计算", "skipped", "未配置") - - # 步骤7: 训练模型 - if 'step7' in config: - self._notify("步骤7: 模型训练", "start") - self.step8_train_ml(**config['step7']) - self._notify("步骤7: 模型训练", "completed", f"(输出: {self.models_dir})") - else: - self._notify("步骤7: 模型训练", "skipped", "未配置") - - # 步骤8_non_empirical_modeling: 非经验统计回归模型训练 - if 'step8_non_empirical_modeling' in config: - self._notify("步骤8: 非经验模型训练", "start") - self.step8_non_empirical_modeling(**config['step8_non_empirical_modeling']) - self._notify("步骤8: 非经验模型训练", "completed", f"(输出: {self.models_dir})") - else: - self._notify("步骤8: 非经验模型训练", "skipped", "未配置") - - # 步骤9: 自定义回归分析 - if 'step9' in config: - self._notify("步骤9: 自定义回归", "start") - self.step9_custom_regression(**config['step9']) - self._notify("步骤9: 自定义回归", "completed", f"(输出: {self.custom_regression_path})") - else: - self._notify("步骤9: 自定义回归", "skipped", "未配置") - - # 步骤10: 生成预测采样点 - if 'step10' in config: - self._notify("步骤10: 采样点生成", "start") - sampling_csv_path = self.step4_sampling(**config['step10']) - self._notify("步骤10: 采样点生成", "completed", f"(输出: {sampling_csv_path})") - else: - sampling_csv_path = None - self._notify("步骤10: 采样点生成", "skipped", "未配置") - - # 步骤11_ml: 预测水质参数 - if 'step11_ml' in config and sampling_csv_path: - self._notify("步骤11: 参数预测", "start") - step11_ml_config = config['step11_ml'].copy() - step11_ml_config['sampling_csv_path'] = sampling_csv_path - prediction_files = self.step9_predict_ml(**step11_ml_config) - self._notify("步骤11: 参数预测", "completed", f"(生成{len(prediction_files)}个预测文件)") - else: - prediction_files = {} - self._notify("步骤11: 参数预测", "skipped", "未配置或缺少采样点") - - # 步骤11: 使用非经验模型进行参数预测 - non_empirical_prediction_files = {} - if 'step11' in config and sampling_csv_path: - self._notify("步骤11: 非经验模型预测", "start") - step11_config = config['step11'].copy() - step11_config['sampling_csv_path'] = sampling_csv_path - non_empirical_prediction_files = self.step11_non_empirical_prediction(**step11_config) - self._notify("步骤11: 非经验模型预测", "completed", f"(生成{len(non_empirical_prediction_files)}个预测文件)") - else: - self._notify("步骤11: 非经验模型预测", "skipped", "未配置或缺少采样点") - - # 步骤12: 使用自定义回归模型进行参数预测 - custom_regression_prediction_files = {} - if 'step12' in config and sampling_csv_path: - self._notify("步骤12: 自定义回归预测", "start") - step12_config = config['step12'].copy() - step12_config['sampling_csv_path'] = sampling_csv_path - custom_regression_prediction_files = self.step12_custom_regression_prediction(**step12_config) - self._notify("步骤12: 自定义回归预测", "completed", f"(生成{len(custom_regression_prediction_files)}个预测文件)") - else: - self._notify("步骤12: 自定义回归预测", "skipped", "未配置或缺少采样点") - - # 合并机器学习预测、非经验模型预测和自定义回归预测结果 - all_prediction_files = {**prediction_files, **non_empirical_prediction_files, **custom_regression_prediction_files} - - # 步骤14: 生成分布图 - distribution_maps = {} - if 'step14' in config and all_prediction_files: - self._notify("步骤14: 分布图生成", "start") - for target_name, pred_file in all_prediction_files.items(): - step14_config = config['step14'].copy() - for _k in ('step9_batch_mode', 'prediction_csv_dir', 'recursive_csv_scan'): - step14_config.pop(_k, None) - step14_config['prediction_csv_path'] = pred_file - if 'output_image_path' not in step14_config: - step14_config['output_image_path'] = None - dist_map_path = self.step10_map(**step14_config) - distribution_maps[target_name] = dist_map_path - self._notify("步骤14: 分布图生成", "completed", f"(生成{len(distribution_maps)}个分布图)") - else: - self._notify("步骤14: 分布图生成", "skipped", "未配置或缺少预测结果") - - # 生成可视化图表 - output_files = {} - pipeline_info = { - 'work_dir': str(self.work_dir), - 'models_dir': str(self.models_dir), - 'prediction_files': all_prediction_files, - 'output_files': {} - } - - # 生成散点图 - if 'visualization' in config and config['visualization'].get('generate_scatter', True): - if self.training_csv_path and self.models_dir.exists(): - try: - self._notify("可视化", "info", "生成模型评估散点图...") - scatter_config = config['visualization'].get('scatter_config', {}) - scatter_paths = self.generate_model_scatter_plots( - metric=scatter_config.get('metric', 'test_r2'), - use_enhanced=scatter_config.get('use_enhanced', True), - feature_start_column=scatter_config.get('feature_start_column', - config.get('step6', {}).get('feature_start_column', 13)), - test_size=scatter_config.get('test_size', 0.2), - random_state=scatter_config.get('random_state', 42) - ) - output_files['scatter_plots'] = scatter_paths - pipeline_info['output_files']['scatter_plots'] = scatter_paths - self._notify("可视化", "info", f"已生成 {len(scatter_paths)} 个散点图") - except Exception as e: - self._notify("可视化", "warning", f"生成散点图时出错: {e}") - - # 生成箱型图 - if 'visualization' in config and config['visualization'].get('generate_boxplots', True): - if self.processed_csv_path: - try: - self._notify("可视化", "info", "生成水质参数箱型图...") - boxplot_config = config['visualization'].get('boxplot_config', {}) - boxplot_paths = self.generate_boxplots( - parameter_columns=boxplot_config.get('parameter_columns', None), - data_start_column=boxplot_config.get('data_start_column', 4), - save_individual=boxplot_config.get('save_individual', True), - use_seaborn=boxplot_config.get('use_seaborn', True) - ) - output_files['boxplots'] = boxplot_paths - pipeline_info['output_files']['boxplots'] = boxplot_paths - self._notify("可视化", "info", f"已生成 {len(boxplot_paths)} 个箱型图") - except Exception as e: - self._notify("可视化", "warning", f"生成箱型图时出错: {e}") - - # 生成光谱曲线图 - if 'visualization' in config and config['visualization'].get('generate_spectrum', True): - if self.training_csv_path: - try: - self._notify("可视化", "info", "生成光谱曲线对比图...") - spectrum_paths = self.generate_spectrum_comparison_plots( - wavelength_start_column=config.get('step6', {}).get('feature_start_column', 'UTM_Y') - ) - output_files['spectrum_plots'] = spectrum_paths - pipeline_info['output_files']['spectrum_plots'] = spectrum_paths - self._notify("可视化", "info", f"已生成 {len(spectrum_paths)} 个光谱曲线图") - except Exception as e: - self._notify("可视化", "warning", f"生成光谱曲线图时出错: {e}") - - # 生成统计图表 - if 'visualization' in config and config['visualization'].get('generate_statistics', True): - if self.processed_csv_path: - try: - self._notify("可视化", "info", "生成统计图表...") - stat_charts = self.generate_statistical_charts() - output_files['statistical_charts'] = stat_charts - pipeline_info['output_files']['statistical_charts'] = stat_charts - self._notify("可视化", "info", "已生成统计图表") - except Exception as e: - self._notify("可视化", "warning", f"生成统计图表时出错: {e}") - - # 生成耀斑分析影像预览图 - if 'visualization' in config and config['visualization'].get('generate_glint_previews', True): - try: - self._notify("可视化", "info", "生成耀斑分析影像预览图...") - glint_preview_config = config['visualization'].get('glint_preview_config', {}) - preview_paths = self.generate_glint_deglint_previews( - work_dir=glint_preview_config.get('work_dir'), - output_subdir=glint_preview_config.get('output_subdir', 'glint_deglint_previews'), - generate_glint=glint_preview_config.get('generate_glint', True), - generate_deglint=glint_preview_config.get('generate_deglint', True) - ) - output_files['glint_deglint_previews'] = preview_paths - pipeline_info['output_files']['glint_deglint_previews'] = preview_paths - self._notify("可视化", "info", f"已生成 {len(preview_paths)} 个耀斑分析预览图") - except Exception as e: - self._notify("可视化", "warning", f"生成耀斑分析预览图时出错: {e}") - - # 生成批量处理摘要 - try: - step1_output = str(self.water_mask_path) if self.water_mask_path else 'N/A' - pipeline_info['step1'] = {'status': 'completed', 'output_file': step1_output} - pipeline_info['step2'] = {'status': 'completed', 'output_file': str(self.glint_mask_path) if self.glint_mask_path else 'N/A'} - pipeline_info['step3'] = {'status': 'completed', 'output_file': str(self.deglint_img_path) if self.deglint_img_path else 'N/A'} - pipeline_info['step4'] = {'status': 'completed', 'output_file': str(self.processed_csv_path) if self.processed_csv_path else 'N/A'} - pipeline_info['step5'] = {'status': 'completed', 'output_file': str(self.training_csv_path) if self.training_csv_path else 'N/A'} - pipeline_info['step6'] = {'status': 'completed', 'output_file': str(self.indices_path) if self.indices_path else 'N/A'} - pipeline_info['step7'] = {'status': 'completed', 'output_file': str(self.models_dir)} - pipeline_info['step9'] = {'status': 'completed', 'output_file': str(self.custom_regression_path) if self.custom_regression_path else 'N/A'} - pipeline_info['training_params'] = config.get('step7', {}) - - summary_path = self.report_generator.generate_batch_inference_summary(pipeline_info) - print(f"批量处理摘要已生成: {summary_path}") - output_files['batch_summary'] = summary_path - except Exception as e: - print(f"生成批量处理摘要时出错: {e}") - - # 记录流程结束时间 - self.pipeline_end_time = time.time() - - # 生成流程执行报告 - try: - report_path = self.generate_pipeline_report() - output_files['pipeline_report'] = report_path - except Exception as e: - print(f"生成流程报告时出错: {e}") - - print("\n" + "="*80) - print("完整流程执行完成!") - print("="*80) - - # 显示总耗时 - if self.pipeline_start_time and self.pipeline_end_time: - total_time = self.pipeline_end_time - self.pipeline_start_time - print(f"总耗时: {self._format_time(total_time)}") - - print(f"\n所有输出文件:") - for key, value in output_files.items(): - if isinstance(value, dict): - print(f" {key}: {len(value)} 个文件") - else: - print(f" {key}: {value}") - - except Exception as e: - # 即使失败也记录结束时间 - self.pipeline_end_time = time.time() - - # 尝试生成报告(即使失败) - try: - report_path = self.generate_pipeline_report() - print(f"\n流程报告已生成: {report_path}") - except: - pass - - print(f"\n流程执行失败: {e}") - import traceback - traceback.print_exc() - raise - - def step8_non_empirical_modeling(self, csv_path: Optional[str] = None, - preprocessing_methods: List[str] = None, - algorithms: List[str] = None, - value_cols: Union[int, Dict[str, int]] = 0, - spectral_start_col: int = 1, - spectral_end_col: Optional[int] = None, - window: int = 5, - output_dir: Optional[str] = None, - enabled: bool = True, - skip_dependency_check: bool = False, **kwargs) -> Dict[str, str]: - """ - 步骤6.5: 非经验统计回归模型训练 - - Args: - csv_path: 训练数据CSV路径(如果为None,使用步骤5的结果) - preprocessing_methods: 预处理方法列表 - algorithms: 算法名称列表(chl_a, nh3, mno4, tn, tp, tss) - value_cols: 实测值列索引,可以是单个整数(所有算法使用同一列)或字典(键为算法名,值为列索引) - spectral_start_col: 光谱数据起始列索引 - spectral_end_col: 光谱数据结束列索引(如果为None,自动检测) - window: 窗口大小 - output_dir: 输出目录路径(如果为None,使用默认目录) - - Returns: - 模型文件路径字典(键为算法名) - """ - # 参数解析(保留原逻辑) - if csv_path is not None: - final_csv_path = csv_path - elif self.training_csv_path is not None: - final_csv_path = self.training_csv_path - else: - final_csv_path = None - - self._notify("started", "步骤6.5: 非经验统计回归模型训练") - result = ModelingStep.train_non_empirical_models( - csv_path=final_csv_path, - preprocessing_methods=preprocessing_methods, - algorithms=algorithms, - value_cols=value_cols, - spectral_start_col=spectral_start_col, - spectral_end_col=spectral_end_col, - window=window, - output_dir=output_dir, - enabled=enabled, - ) - self._record_step_time("步骤6.5: 非经验模型训练", 0, 0) - self._notify("completed", f"非经验模型训练完成") - return result - - def step9_custom_regression(self, - csv_path: Optional[str] = None, - x_columns: Optional[Union[str, List[str]]] = None, - y_columns: Optional[Union[str, List[str]]] = None, - methods: Union[str, List[str]] = 'all', - output_dir: Optional[str] = None, - enabled: bool = True, - skip_dependency_check: bool = False, **kwargs) -> str: - """ - 步骤6.75: 使用自定义回归方法分析指标与目标参数之间的关系 - """ - # 参数解析(保留原逻辑) - if csv_path is not None: - input_csv = csv_path - elif self.indices_path is not None: - input_csv = self.indices_path - else: - input_csv = None - - self._notify("started", "步骤6.75: 自定义回归分析") - result = ModelingStep.custom_regression( - csv_path=input_csv, - x_columns=x_columns, - y_columns=y_columns, - methods=methods, - output_dir=output_dir, - enabled=enabled, - work_dir=str(self.work_dir), - ) - self.custom_regression_path = result - self._record_step_time("步骤6.75: 自定义回归", 0, 0) - self._notify("completed", f"自定义回归结果已保存到目录: {result}") - return result - def _apply_preprocessing(self, csv_path: str, preprocess_method: str, output_dir: Path, spectral_start_col: int = 4) -> str: - """ - 应用预处理到CSV数据 - - Args: - csv_path: 原始CSV文件路径 - preprocess_method: 预处理方法名称 - output_dir: 输出目录 - spectral_start_col: 光谱数据起始列索引(0-based) - - Returns: - 预处理后的CSV文件路径 - """ - # 兼容中文和各种格式 - raw_p = str(preprocess_method).lower() - if raw_p == 'none' or '无' in raw_p or '跳过' in raw_p: - preprocess_method = 'None' - elif raw_p == 'mms' or 'minmax' in raw_p or '最大最小' in raw_p: - preprocess_method = 'MMS' - elif raw_p == 'ss' or '标准' in raw_p or '标准化' in raw_p: - preprocess_method = 'SS' - elif raw_p == 'snv' or '标准正态' in raw_p: - preprocess_method = 'SNV' - elif raw_p == 'ma' or '移动' in raw_p: - preprocess_method = 'MA' - elif raw_p == 'sg' or 'savitzky' in raw_p or '平滑' in raw_p: - preprocess_method = 'SG' - elif raw_p == 'msc' or '多元散射' in raw_p: - preprocess_method = 'MSC' - elif raw_p == 'd1' or 'd2' or 'dt' or '导数' in raw_p: - preprocess_method = {'d1': 'D1', 'd2': 'D2', 'dt': 'DT'}.get(raw_p, raw_p.upper()) - elif raw_p == 'ct' or '去趋势' in raw_p: - preprocess_method = 'CT' - else: - preprocess_method = preprocess_method # 保持原值 - - # 如果不需要预处理,直接返回原文件 - if preprocess_method == 'None': - return csv_path - - # 生成预处理后的文件路径 - output_filename = f"preprocessed_{preprocess_method}.csv" - output_path = str(output_dir / output_filename) - - # 检查是否已存在预处理文件 - if Path(output_path).exists(): - print(f"检测到已存在的预处理文件,直接使用: {output_path}") - return output_path - - # 读取原始数据 - df = pd.read_csv(csv_path) - - # 分离坐标列和参数列(前几列)与光谱数据列 - non_spectral_cols = df.iloc[:, :spectral_start_col] # 光谱数据前的列 - spectral_data = df.iloc[:, spectral_start_col:] # 光谱数据列 - - # 应用预处理 - 使用spectral_Preprocessing模块 - from src.preprocessing.spectral_Preprocessing import Preprocessing - - # 为SS预处理提供scaler保存路径,保存在工作目录的8_Supervised_Model_Training中 - save_path = None - if preprocess_method == 'SS': - models_dir = output_dir.parent.parent / "8_Supervised_Model_Training" # 向上两级到工作目录 - models_dir.mkdir(parents=True, exist_ok=True) - save_path = str(models_dir / "scaler_params.pkl") - print(f"SS预处理: scaler模型将保存到 {save_path}") - - # 调用预处理函数(为SS方法传递save_path) - processed_spectral = Preprocessing(preprocess_method, spectral_data, save_path=save_path) - - # 重新组合数据 - if isinstance(processed_spectral, pd.DataFrame): - processed_df = pd.concat([non_spectral_cols, processed_spectral], axis=1) - else: - # 如果是numpy数组,转换为DataFrame - processed_spectral_df = pd.DataFrame(processed_spectral, - columns=spectral_data.columns, - index=spectral_data.index) - processed_df = pd.concat([non_spectral_cols, processed_spectral_df], axis=1) - - # 保存预处理后的数据 - processed_df.to_csv(output_path, index=False) - print(f"预处理完成: {output_path}") - - return output_path - - def _generate_non_empirical_summary(self, model_results: Dict[str, str], output_dir: Path) -> str: - """ - 生成非经验模型训练结果汇总CSV - - Args: - model_results: 模型文件路径字典 - output_dir: 输出目录 - - Returns: - 汇总CSV文件路径 - """ - summary_path = str(output_dir / "non_empirical_models_summary.csv") - - summary_data = [] - - for model_key, model_path in model_results.items(): - try: - # 从文件名解析预处理方法和算法名 - parts = model_key.split('_') - preprocess_method = parts[0] - algorithm_name = '_'.join(parts[1:]) if len(parts) > 2 else parts[1] - - # 读取JSON模型文件 - with open(model_path, 'r', encoding='utf-8') as f: - model_info = json.load(f) - - # 提取模型信息 - summary_row = { - 'Preprocessing Method': preprocess_method, - 'Algorithm Name': algorithm_name, - 'Model Type': model_info.get('model_type', ''), - 'Coefficient Count': len(model_info.get('model_info', [])), - 'Average Accuracy(%)': np.mean(model_info.get('accuracy', [0])) if model_info.get('accuracy') else 0, - 'Min Accuracy(%)': np.min(model_info.get('accuracy', [0])) if model_info.get('accuracy') else 0, - 'Max Accuracy(%)': np.max(model_info.get('accuracy', [0])) if model_info.get('accuracy') else 0, - 'Sample Count': len(model_info.get('long', [])), - 'Model File': model_path - } - - # 添加系数信息(前几个系数) - coefficients = model_info.get('model_info', []) - for i, coeff in enumerate(coefficients[:5]): # 只显示前5个系数 - summary_row[f'系数_{i+1}'] = coeff - - summary_data.append(summary_row) - - except Exception as e: - print(f"读取模型文件 {model_path} 时出错: {e}") - continue - - if summary_data: - # 创建DataFrame并保存 - df_summary = pd.DataFrame(summary_data) - df_summary.to_csv(summary_path, index=False, encoding='utf-8-sig') - print(f"汇总文件已生成: {summary_path}") - else: - print("警告: 没有有效的模型数据可汇总") - summary_path = "" - - return summary_path - - def step11_non_empirical_prediction(self, sampling_csv_path: str, - non_empirical_models_dir: Optional[str] = None, - output_path: Optional[str] = None, - metric: str = 'Average Accuracy(%)', - prediction_column: str = 'prediction', - enabled: bool = True, - skip_dependency_check: bool = False, **kwargs) -> Dict[str, str]: - """ - 步骤11: 使用非经验统计回归模型进行参数预测 - - 根据非经验模型训练结果汇总CSV筛选给定方法的准确率最高的模型,使用该模型进行预测 - - Args: - sampling_csv_path: 采样点光谱数据CSV路径 - non_empirical_models_dir: 非经验模型保存目录(如果为None,使用步骤8的结果) - output_path: 输出目录路径(如果为None,使用默认目录) - metric: 选择最佳模型的指标(默认使用平均准确率) - prediction_column: 预测结果列名 - - Returns: - 预测结果文件路径字典(键为算法名) - """ - self._notify("started", "步骤11: 使用非经验模型进行参数预测") - result = PredictionStep.predict_with_non_empirical_models( - sampling_csv_path=sampling_csv_path, - non_empirical_models_dir=non_empirical_models_dir, - output_dir=output_path, - metric=metric, - prediction_column=prediction_column, - enabled=enabled, - work_dir=str(self.work_dir), - ) - self._record_step_time("步骤11: 非经验模型预测", 0, 0) - self._notify("completed", f"非经验模型预测完成,结果保存在: {self.prediction_dir}") - return result - - def step12_custom_regression_prediction(self, sampling_csv_path: str, - custom_regression_dir: Optional[str] = None, - formula_csv_path: Optional[str] = None, - coordinate_columns: Optional[List[str]] = None, - output_dir: Optional[str] = None, - filename_prefix: str = "custom_regression_prediction", - enabled: bool = True, - skip_dependency_check: bool = False, **kwargs) -> Dict[str, str]: - """ - 步骤12: 使用自定义回归模型进行参数预测 - - 使用新的CustomRegressionPredictor模块,基于13_Custom_Regression文件夹中的CSV, - 根据r_squared选择最佳模型,批量预测水质参数 - - Args: - sampling_csv_path: 采样点光谱数据CSV路径(来自步骤10) - custom_regression_dir: 自定义回归模型目录(13_Custom_Regression) - formula_csv_path: 公式CSV文件路径,用于查找index_formula - coordinate_columns: 坐标列名列表,默认为['longitude', 'latitude']或自动识别 - output_dir: 输出目录,默认为prediction_dir - filename_prefix: 输出文件名前缀 - enabled: 是否启用 - skip_dependency_check: 是否跳过依赖检查 - - Returns: - 预测结果文件路径字典(键为参数名) - """ - self._notify("started", "步骤12: 使用自定义回归模型进行参数预测") - result = PredictionStep.predict_with_custom_regression( - sampling_csv_path=sampling_csv_path, - custom_regression_dir=custom_regression_dir, - formula_csv_path=formula_csv_path, - coordinate_columns=coordinate_columns, - output_dir=output_dir, - filename_prefix=filename_prefix, - enabled=enabled, - work_dir=str(self.work_dir), - ) - self._record_step_time("步骤12: 自定义回归模型预测", 0, 0) - self._notify("completed", f"自定义回归预测完成") - return result - - -def main(): - """主函数示例""" - parser = argparse.ArgumentParser(description="水质参数反演框架主程序") - parser.add_argument('--config', type=str, help='配置文件路径(JSON格式)') - parser.add_argument('--work_dir', type=str, default='./work_dir', help='工作目录') - parser.add_argument('--mode', type=str, choices=['full', 'step'], - default='full', help='运行模式:full(完整流程)或step(单步执行)') - - args = parser.parse_args() - - # 创建管道实例 - pipeline = WaterQualityInversionPipeline(work_dir=args.work_dir) - - # 示例配置(用户可以根据实际情况修改) - example_config = { - 'step1': { - 'mask_path': r"D:\BaiduNetdiskDownload\yaobao\roi\roi.shp", # 支持shp或dat格式,如果是shp需要提供img_path - 'img_path': r"D:\BaiduNetdiskDownload\yaobao\result3.bsq", # 当mask_path为shp格式时必须提供,用于栅格化 - }, - 'step2': { - 'img_path': r"D:\BaiduNetdiskDownload\yaobao\result3.bsq", - 'glint_wave': 550.0, - 'method': 'otsu', - 'max_area' :50, # 过滤掉面积超过10000像素的连通域 - 'buffer_size' : 10 # 可选: 'otsu', 'zscore', 'percentile', 'iqr', 'adaptive', 'multi_band' - # 'z_threshold': 2.5, # zscore方法使用 - # 'percentile': 95.0, # percentile和adaptive方法使用 - # 'iqr_multiplier': 1.5, # iqr方法使用 - # 'window_size': 15, # adaptive方法使用 - # 'multi_band_waves': [750, 800, 850], # multi_band方法使用 - # 'sub_method': 'zscore', # multi_band方法的子方法 - # 'weights': None, # multi_band方法的权重 - # 'max_area': 10000, # 最大连通域面积阈值(像素数),用于去除岸边、浅水、水华等大面积区域(None表示不过滤) - # 'buffer_size': 50, # 岸边缓冲区大小(像素数),用于去除岸边附近的错误耀斑掩膜(None表示不启用) - }, - 'step3': { - 'img_path': r"D:\BaiduNetdiskDownload\yaobao\result3.bsq", - 'method': 'goodman', # 可选: 'subtract_nir', 'regression_slope', 'oxygen_absorption', - # 'kutser', 'goodman', 'hedley', 'sugar' - 'enabled': True, # 是否执行去除耀斑;False 时跳过并直接使用原始影像 - # 0值像素插值参数(可选): - 'interpolate_zeros': False, # 是否对0值像素进行插值(默认False) - 'interpolation_method': 'bilinear', # 插值方法: 'nearest'(邻近), 'bilinear'(双线性), - # 'spline'(样条), 'kriging'(克里金) - # 水域掩膜参数(可选): - 'water_mask_path':r"D:\BaiduNetdiskDownload\yaobao\roi\roi.shp", # None表示自动使用步骤1生成的掩膜,也可以提供: - # # - numpy数组 - # # - 栅格文件路径(.dat/.tif) - # # - shapefile路径(.shp) - # 'start_wave': 780.0, # subtract_nir和regression_slope方法需要 - # 'end_wave': 850.0, # subtract_nir和regression_slope方法需要 - # 'json_path': 'path/to/roi.json' # regression_slope方法需要 - # Kutser方法参数示例: - # 'kutser_shp_path': 'path/to/deep_water.shp', # 已废弃,请使用water_mask参数 - # 'oxy_band': 38, # 氧吸收波段索引 - # 'lower_oxy': 36, # 氧吸收下波段索引 - # 'upper_oxy': 49, # 氧吸收上波段索引 - # 'nir_band': 47, # NIR波段索引 - # Goodman方法参数示例: - 'nir_lower': 65, # NIR下波段索引 - 'nir_upper': 91, # NIR上波段索引 - 'goodman_A': 0.000019, # Goodman参数A - 'goodman_B': 0.1, # Goodman参数B - # Hedley方法参数示例: - # 'hedley_shp_path': 'path/to/deep_water.shp', # 已废弃,请使用water_mask参数 - # 'hedley_nir_band': 47, # NIR波段索引 - # SUGAR方法参数示例: - # 'sugar_bounds': [(1, 2)], # 优化边界 - # 'sugar_sigma': 1.0, # LoG平滑sigma - # 'sugar_estimate_background': True, # 是否估计背景光谱 - # 'sugar_glint_mask_method': 'cdf', # 'cdf'或'otsu' - # 'sugar_iter': 1, # 迭代次数,None表示自动终止 - # 'sugar_termination_thresh': 20.0 # 终止阈值 - }, - 'step4': { - 'csv_path': r"D:\BaiduNetdiskDownload\yaobao\csv\input.csv"#水质参数原始文件 - }, - 'step5': { - 'radius': 5, - 'source_epsg': 4326, - # 单步运行时建议显式指定;完整流程中可省略,将使用步骤2输出的耀斑掩膜 - # 'glint_mask_path': r"path/to/severe_glint_area.dat", - }, - 'step6': { - 'formula_csv_file': 'path/to/water_quality_formulas.csv', # 公式CSV文件路径 - 'formula_names': ['Al10SABI', 'TurbBe16RedOverViolet'], # 要计算的公式名称列表 - 'output_filename': 'training_spectra_indices.csv', - 'enabled': True # 是否启用水质指数计算 - }, - 'step7': { - 'feature_start_column': '374.285004', - 'preprocessing_methods': ['None', 'MMS', 'SS', 'SNV', 'MA', 'SG', 'MSC', 'D1', 'D2', 'DT', 'CT'], - 'model_names': ['SVR', 'RF', 'Ridge', 'Lasso'], - 'split_methods': ['spxy', 'ks', 'random'], - 'cv_folds': 3 - }, - 'step8_non_empirical_modeling': { - 'preprocessing_methods': ['None', 'MMS', 'SS', 'SNV', 'MA', 'SG', 'MSC', 'D1', 'D2', 'DT', 'CT'], - 'algorithms': ['chl_a', 'nh3', 'mno4', 'tn', 'tp', 'tss'], - 'value_cols': 0, # 可以是单个整数或字典,如 {'chl_a': 0, 'nh3': 1, 'mno4': 2, 'tn': 3, 'tp': 4, 'tss': 5} - 'spectral_start_col': 1, - 'window': 5, - 'enabled': True # 是否启用非经验模型训练 - }, - 'step9': { - 'x_columns': ['NDWI', 'NDVI'], # 自变量列名列表 - 'y_columns': ['chl_a', 'tn', 'tp'], # 因变量列名列表 - 'methods': 'all', # 回归方法 - 'output_dir': 'custom_regression_results', # 输出目录 - 'enabled': True # 是否启用自定义回归分析 - }, - 'step10': { - 'interval': 50, - 'sample_radius': 5, - 'chunk_size': 1000, - 'water_mask_path': None, # 若为None,将自动使用步骤1生成的dat水体掩膜 - # 可选:耀斑掩膜文件(dat),若不提供将使用步骤2结果;需要外部指定时取消注释 - # 'glint_mask_path': r"D:\path\to\severe_glint_area.dat", - }, - 'step11_ml': { - 'metric': 'test_r2', - 'prediction_column': 'prediction' - }, - 'step11': { - 'metric': 'Average Accuracy(%)', # 选择最佳模型的指标 - 'prediction_column': 'prediction', - 'enabled': True # 是否启用非经验模型预测 - }, - 'step12': { - 'custom_regression_dir': None, # 自定义回归模型目录(None表示使用13_Custom_Regression) - 'formula_csv_path': None, # 公式CSV文件路径,用于查找index_formula(如water_quality_formulas.csv) - 'coordinate_columns': None, # 坐标列名(None表示自动识别) - 'output_dir': None, # 输出目录(None表示使用prediction_dir) - 'filename_prefix': 'custom_regression_prediction', # 输出文件名前缀 - 'enabled': True # 是否启用自定义回归预测 - }, - 'step14': { - 'boundary_shp_path': r"D:\BaiduNetdiskDownload\yaobao\roi\roi.shp" , - 'resolution': 30, - 'input_crs': 'EPSG:32651', - 'output_crs': 'EPSG:4326', - # 可选参数,若不需要可删除 - 'show_sample_points': False, - 'base_map_tif': None, - 'use_distance_diffusion': True, - 'max_diffusion_distance': None, - 'diffusion_power': 2, - 'diffusion_n_neighbors': 15, - 'cmap': None, - 'expand_ratio': 0.05 - }, - 'visualization': { - 'generate_scatter': True, # 是否生成散点图 - 'generate_boxplots': True, # 是否生成箱型图 - 'generate_spectrum': True, # 是否生成光谱曲线图 - 'generate_statistics': True, # 是否生成统计图表 - 'generate_glint_previews': True, # 是否生成2_Glint_Detection和3_deglint的PNG预览图 - 'scatter_config': { - 'metric': 'test_r2', # 选择最佳模型的指标 - 'use_enhanced': True, # 是否使用增强版散点图(带置信区间) - 'feature_start_column': 13, # 特征开始列索引 - 'test_size': 0.2, # 测试集比例 - 'random_state': 42 # 随机种子 - }, - 'boxplot_config': { - 'parameter_columns': None, # 参数列名列表(None表示自动检测) - 'data_start_column': 4, # 数据开始列索引(从第5列开始,索引为4) - 'save_individual': True, # 是否为每个参数单独保存箱型图 - 'use_seaborn': True # 是否使用seaborn绘制(更美观) - }, - 'glint_preview_config': { - 'work_dir': None, # 工作目录(None表示使用pipeline的工作目录) - 'output_subdir': 'glint_deglint_previews', # 输出子目录 - 'generate_glint': True, # 是否处理2_Glint_Detection文件夹 - 'generate_deglint': True # 是否处理3_deglint文件夹 - } - } - } - - if args.config: - # 从配置文件加载(需要实现JSON加载逻辑) - import json - with open(args.config, 'r', encoding='utf-8') as f: - config = json.load(f) - else: - # 使用示例配置(用户需要修改) - config = example_config - print("警告: 使用示例配置,请根据实际情况修改配置参数") - - if args.mode == 'full': - pipeline.run_full_pipeline(config) - else: - print("单步执行模式,请直接调用对应的step方法") - print("例如: pipeline.step1_generate_water_mask(...)") - - -def example_independent_steps(): - """ - 示例:展示如何独立运行各个步骤 - - 这个函数展示了如何在不依赖完整流水线的情况下,单独运行各个步骤。 - 每个步骤都可以通过提供必要的参数独立执行。 - """ - print("水质参数反演流水线 - 独立步骤运行示例") - print("="*80) - - # 创建流水线实例 - pipeline = WaterQualityInversionPipeline(work_dir="./example_work_dir") - - try: - # 示例1: 独立运行步骤1 - 生成水域掩膜 - print("\n示例1: 独立运行步骤1 - 生成水域掩膜") - try: - water_mask_path = pipeline.step1_generate_water_mask( - mask_path="path/to/water_mask.shp", # 或者 .dat/.tif 文件 - img_path="path/to/image.bsq" - ) - print(f"水域掩膜已生成: {water_mask_path}") - except Exception as e: - print(f"步骤1失败: {e}") - - # 示例2: 独立运行步骤2 - 耀斑区域检测 - print("\n示例2: 独立运行步骤2 - 耀斑区域检测") - try: - glint_mask_path = pipeline.step2_find_glint_area( - img_path="path/to/image.bsq", - water_mask_path="path/to/water_mask.dat", # 可选,提供水体掩膜 - skip_dependency_check=True # 允许跳过依赖检查 - ) - print(f"耀斑掩膜已生成: {glint_mask_path}") - except Exception as e: - print(f"步骤2失败: {e}") - - # 示例3: 独立运行步骤4 - 数据预处理 - print("\n示例3: 独立运行步骤4 - 数据预处理") - try: - processed_csv = pipeline.step5_process_csv( - csv_path="path/to/water_quality_data.csv" - ) - print(f"处理后的CSV文件: {processed_csv}") - except Exception as e: - print(f"步骤4失败: {e}") - - # 示例4: 独立运行步骤5 - 光谱提取 - print("\n示例4: 独立运行步骤5 - 光谱提取") - try: - training_spectra = pipeline.step6_extract_spectra( - deglint_img_path="path/to/deglint_image.bsq", - csv_path="path/to/processed_data.csv", - glint_mask_path="path/to/severe_glint_area.dat", - skip_dependency_check=True - ) - print(f"训练光谱数据: {training_spectra}") - except Exception as e: - print(f"步骤5失败: {e}") - - # 示例5: 独立运行步骤6 - 模型训练 - print("\n示例5: 独立运行步骤6 - 模型训练") - try: - models_dir = pipeline.step6_train_models( - training_csv_path="path/to/training_spectra.csv", - skip_dependency_check=True - ) - print(f"模型目录: {models_dir}") - except Exception as e: - print(f"步骤6失败: {e}") - - # 示例6: 独立运行步骤10 - 采样点生成 - print("\n示例6: 独立运行步骤10 - 采样点生成") - try: - sampling_csv = pipeline.step4_sampling( - deglint_img_path="path/to/deglint_image.bsq", - water_mask_path="path/to/water_mask.dat", - skip_dependency_check=True - ) - print(f"采样点数据: {sampling_csv}") - except Exception as e: - print(f"步骤10失败: {e}") - - # 示例7: 独立运行步骤11 - 水质预测 - print("\n示例7: 独立运行步骤11 - 水质预测") - try: - predictions = pipeline.step9_predict_ml( - sampling_csv_path="path/to/sampling_spectra.csv", - models_dir="path/to/models_directory", - skip_dependency_check=True - ) - print(f"预测结果: {predictions}") - except Exception as e: - print(f"步骤11失败: {e}") - - # 示例8: 独立运行步骤14 - 分布图生成 - print("\n示例8: 独立运行步骤14 - 分布图生成") - try: - distribution_map = pipeline.step10_map( - prediction_csv_path="path/to/prediction_results.csv", - boundary_shp_path="path/to/boundary.shp", - skip_dependency_check=True - ) - print(f"分布图: {distribution_map}") - except Exception as e: - print(f"步骤14失败: {e}") - - print("\n" + "="*80) - print("独立步骤运行示例完成") - print("注意:请将示例中的路径替换为实际的文件路径") - - except Exception as e: - print(f"运行示例时出错: {e}") - import traceback - traceback.print_exc() - - -if __name__ == "__main__": - import sys - if len(sys.argv) > 1 and sys.argv[1] == "--example": - example_independent_steps() - else: - main() - diff --git a/src/gui/core/pipeline_executor.py b/src/gui/core/pipeline_executor.py index cfb6470..57200a7 100644 --- a/src/gui/core/pipeline_executor.py +++ b/src/gui/core/pipeline_executor.py @@ -26,6 +26,7 @@ Pipeline 执行器 import os import copy +import traceback from pathlib import Path from typing import Dict, List, Optional @@ -74,6 +75,9 @@ class PipelineExecutor(QObject): self._workspace_initializer = workspace_initializer self._worker: Optional[WorkerThread] = None + # 订阅面板发出的单步执行请求(解耦面板与执行器) + global_event_bus.subscribe('RequestRunSingleStep', self._on_request_run_single_step) + # ═══════════════════════════════════════════════════════════ # 公开 API # ═══════════════════════════════════════════════════════════ @@ -98,26 +102,60 @@ class PipelineExecutor(QObject): 6. 获取配置 + 模式裁剪 7. 一次性全预检 + 用户交互 8. 确认执行 → 创建 WorkerThread → 启动 + + 关键防静默失败设计: + - 每一个 return 前必须通过 EventBus 发布 LogMessage + - 整个方法体包裹在 try/except 中,防止 PyQt5 槽函数静默吞异常 """ + print("==== [探针] run_full_pipeline 方法体已进入 ====", flush=True) + try: + self._run_full_pipeline_impl() + except Exception as e: + err_detail = traceback.format_exc() + global_event_bus.publish('LogMessage', { + 'message': f'[致命错误] run_full_pipeline 异常: {e}', + 'level': 'error', + }) + global_event_bus.publish('LogMessage', { + 'message': f'详细追踪:\n{err_detail}', + 'level': 'error', + }) + QMessageBox.critical( + self.parent(), "运行失败", + f"启动流程时发生未预期的错误:\n\n{e}\n\n详细信息已输出到日志区。" + ) + + def _run_full_pipeline_impl(self): + """run_full_pipeline 的实现体,由外层 try/except 保护。""" + # ★ 终端即时反馈:确保即使 EventBus/日志区未就绪也能看到 + print("\n[PipelineExecutor] 收到「运行完整流程」指令,开始执行...") + if not PIPELINE_AVAILABLE: global_event_bus.publish('LogMessage', { 'message': '无法导入 Pipeline 模块,请检查项目文件结构!', 'level': 'error', }) - # 阻断性错误仍需弹窗(用户必须知道) QMessageBox.critical( self.parent(), "错误", - "无法导入pipeline模块,请确保water_quality_inversion_pipeline_GUI.py文件存在!" + "无法导入 Pipeline 模块,请检查 src/core/handlers/ 目录是否完整!" ) return # ── 1) 获取 work_dir ── work_dir = self._workspace_initializer.work_dir if not work_dir: + global_event_bus.publish('LogMessage', { + 'message': '⚠ 未选择工作目录,流程中止。请先通过「工具 → 设置工作目录」选择工作目录。', + 'level': 'warning', + }) QMessageBox.warning(self.parent(), "警告", "未选择工作目录,请先设置工作目录。") return work_path = Path(work_dir) + global_event_bus.publish('LogMessage', { + 'message': f'[运行] 工作目录: {work_dir}', + 'level': 'info', + }) # ── 2) 运行前扫描 + 自动回填 ── global_event_bus.publish('LogMessage', { @@ -132,11 +170,19 @@ class PipelineExecutor(QObject): # ── 3) step3 波段越界预检 ── if not self._precheck_step3_bands(): + global_event_bus.publish('LogMessage', { + 'message': '⚠ 流程中止:step3 波段越界预检未通过(用户取消或波段配置无效)', + 'level': 'warning', + }) return # ── 4) 全流程模式选择弹窗 ── mode_dlg = PipelineModeDialog(main_window=self.parent(), parent=self.parent()) if mode_dlg.exec() != QDialog.Accepted: + global_event_bus.publish('LogMessage', { + 'message': '⚠ 流程中止:用户取消了模式选择对话框', + 'level': 'warning', + }) return selected_mode = mode_dlg.selected_mode global_event_bus.publish('LogMessage', { @@ -147,8 +193,17 @@ class PipelineExecutor(QObject): 'level': 'info', }) - # ── 5) 获取配置 ── + # ── 5) 获取配置(★ 先预加载所有面板,确保配置完整) ── + global_event_bus.publish('LogMessage', { + 'message': '[运行] 正在收集所有步骤面板的配置...', + 'level': 'info', + }) + self._panel_factory.preload_all() config = self._get_current_config() + global_event_bus.publish('LogMessage', { + 'message': f'[运行] 已收集 {len(config)} 个步骤的配置: {list(config.keys())}', + 'level': 'info', + }) # ── 6) 模式裁剪 ── if selected_mode == "prediction_only": @@ -164,9 +219,17 @@ class PipelineExecutor(QObject): skip_list: List[str] = [] if missing_items: + global_event_bus.publish('LogMessage', { + 'message': f'[预检] 发现 {len(missing_items)} 个缺失项,弹出预检对话框...', + 'level': 'warning', + }) critical_items = [it for it in missing_items if it.is_critical] if critical_items: lines = "\n".join(f" - [{it.step_name}] {it.reason}" for it in critical_items) + global_event_bus.publish('LogMessage', { + 'message': f'[预检] 阻断性错误 ({len(critical_items)} 项):\n{lines}', + 'level': 'error', + }) QMessageBox.critical( self.parent(), "预检失败(阻断性错误)", f"以下为阻断性缺失,流程无法启动:\n\n{lines}\n\n请填写后重新运行。" @@ -175,21 +238,28 @@ class PipelineExecutor(QObject): dialog = PreflightDialog(missing_items, parent=self.parent()) if dialog.exec() != QDialog.Accepted: + global_event_bus.publish('LogMessage', { + 'message': '⚠ 流程中止:用户取消了预检对话框', + 'level': 'warning', + }) return result = dialog.get_result() if result is None: + global_event_bus.publish('LogMessage', { + 'message': '⚠ 流程中止:预检对话框返回空结果', + 'level': 'warning', + }) return action, *payload = result if action == "fill": _, step_id, tab_index = result - # 发布事件:请求切换到指定 tab global_event_bus.publish('NavigateToTab', { 'tab_index': tab_index, 'step_id': step_id, }) global_event_bus.publish('LogMessage', { - 'message': f'[预检] 用户选择填写 {step_id},已切换到对应面板。', + 'message': f'[预检] 用户选择填写 {step_id},已切换到对应面板。流程暂停,填写完成后请重新运行。', 'level': 'info', }) return @@ -197,8 +267,13 @@ class PipelineExecutor(QObject): if skip_list: global_event_bus.publish('LogMessage', { 'message': f'[预检] 用户强制跳过 {len(skip_list)} 个步骤: {skip_list}', - 'level': 'info', + 'level': 'warning', }) + else: + global_event_bus.publish('LogMessage', { + 'message': '[预检] ✓ 所有必需项均已就绪,无需弹窗', + 'level': 'info', + }) # ── 8) 确认执行 ── reply = QMessageBox.question( @@ -207,6 +282,10 @@ class PipelineExecutor(QObject): QMessageBox.Yes | QMessageBox.No ) if reply != QMessageBox.Yes: + global_event_bus.publish('LogMessage', { + 'message': '⚠ 流程中止:用户取消了执行确认', + 'level': 'warning', + }) return # ── 9) 准备 worker_config ── @@ -222,6 +301,11 @@ class PipelineExecutor(QObject): if not enabled: worker_config.pop('step6_feature', None) + global_event_bus.publish('LogMessage', { + 'message': f'[运行] 最终执行配置包含 {len(worker_config)} 个步骤: {list(worker_config.keys())}', + 'level': 'info', + }) + # ── 10) 创建 WorkerThread 并连线 ── self._worker = WorkerThread(work_dir, worker_config, mode='full', skip_list=skip_list) self._worker.log_message.connect(self._on_log_message, Qt.QueuedConnection) @@ -245,17 +329,48 @@ class PipelineExecutor(QObject): step_name: 步骤名称(如 'step1', 'step5_clean') config: 步骤配置字典(可选,默认从面板获取) """ + try: + self._run_single_step_impl(step_name, config) + except Exception as e: + err_detail = traceback.format_exc() + global_event_bus.publish('LogMessage', { + 'message': f'[致命错误] run_single_step 异常: {e}', + 'level': 'error', + }) + global_event_bus.publish('LogMessage', { + 'message': f'详细追踪:\n{err_detail}', + 'level': 'error', + }) + QMessageBox.critical( + self.parent(), "运行失败", + f"启动单步执行时发生未预期的错误:\n\n{e}\n\n详细信息已输出到日志区。" + ) + + def _run_single_step_impl(self, step_name: str, config: dict = None): if not PIPELINE_AVAILABLE: + global_event_bus.publish('LogMessage', { + 'message': '无法导入 Pipeline 模块,请检查 src/core/handlers/ 目录是否完整!', + 'level': 'error', + }) QMessageBox.critical( self.parent(), "错误", - "无法导入pipeline模块,请确保water_quality_inversion_pipeline_GUI.py文件存在!" + "无法导入 Pipeline 模块,请检查 src/core/handlers/ 目录是否完整!" ) return work_dir = self._workspace_initializer.work_dir or './work_dir' if config is None: + global_event_bus.publish('LogMessage', { + 'message': '[运行] 正在收集所有步骤面板的配置...', + 'level': 'info', + }) + self._panel_factory.preload_all() config = self._get_current_config() + global_event_bus.publish('LogMessage', { + 'message': f'[运行] 已收集 {len(config)} 个步骤的配置', + 'level': 'info', + }) global_event_bus.publish('LogMessage', { 'message': f'初始化 Pipeline,工作目录: {work_dir}', @@ -295,6 +410,47 @@ class PipelineExecutor(QObject): }) global_event_bus.publish('PipelineStopped', {}) + # ═══════════════════════════════════════════════════════════ + # EventBus 订阅回调 + # ═══════════════════════════════════════════════════════════ + + def _on_request_run_single_step(self, data: dict): + """处理面板通过 EventBus 发出的单步执行请求。 + + data 格式: {'step_name': 'step1', 'config': {'step1': {...}}} + + 前置条件检查(预检/工作目录)由 run_single_step → _run_single_step_impl + 内部统一处理,此处仅做解析 + 转发 + 异常兜底。 + """ + try: + step_name = data.get('step_name') + config = data.get('config') + + if not step_name: + global_event_bus.publish('LogMessage', { + 'message': '[单步执行] 请求缺少 step_name,忽略', + 'level': 'warning', + }) + return + + global_event_bus.publish('LogMessage', { + 'message': f'[单步执行] 收到 {step_name} 的执行请求', + 'level': 'info', + }) + + self.run_single_step(step_name, config) + + except Exception as e: + err_detail = traceback.format_exc() + global_event_bus.publish('LogMessage', { + 'message': f'[致命错误] _on_request_run_single_step({step_name}) 异常: {e}', + 'level': 'error', + }) + global_event_bus.publish('LogMessage', { + 'message': f'详细追踪:\n{err_detail}', + 'level': 'error', + }) + # ═══════════════════════════════════════════════════════════ # WorkerThread 信号 → EventBus 事件(纯转发,零 UI 操作) # ═══════════════════════════════════════════════════════════ diff --git a/src/gui/core/viz_thread.py b/src/gui/core/viz_thread.py index 9dfe9b3..2abc686 100644 --- a/src/gui/core/viz_thread.py +++ b/src/gui/core/viz_thread.py @@ -178,7 +178,7 @@ class VisualizationWorkerThread(QThread): {"task": "statistics", "output_paths": output_paths} ) elif self.task == "scatter": - from src.core.water_quality_inversion_pipeline_GUI import WaterQualityInversionPipeline + from src.core.visualization.scatter_plot import generate_model_scatter_plots training_csv_path = (self.extra.get("training_csv_path") or "").strip() models_dir = (self.extra.get("models_dir") or "").strip() @@ -188,10 +188,9 @@ class VisualizationWorkerThread(QThread): if not models_dir or not Path(models_dir).is_dir(): self.failed.emit("模型目录无效或不存在,请确认步骤6已生成 7_Supervised_Model_Training 下的参数子文件夹。") return - pipeline = WaterQualityInversionPipeline(work_dir=str(wp)) - scatter_paths = pipeline.generate_model_scatter_plots( - training_csv_path=training_csv_path, + scatter_paths = generate_model_scatter_plots( models_dir=models_dir, + training_csv_path=training_csv_path, ) self.finished_ok.emit({"task": "scatter", "scatter_paths": scatter_paths or {}}) elif self.task == "generate_all_selected": @@ -205,11 +204,10 @@ class VisualizationWorkerThread(QThread): if training_csv.is_file(): models_dir = wp / "7_Supervised_Model_Training" if models_dir.is_dir() and any(d.is_dir() for d in models_dir.iterdir()): - from src.core.water_quality_inversion_pipeline_GUI import WaterQualityInversionPipeline - pipeline = WaterQualityInversionPipeline(work_dir=str(wp)) - scatter_paths = pipeline.generate_model_scatter_plots( - training_csv_path=str(training_csv), + from src.core.visualization.scatter_plot import generate_model_scatter_plots + scatter_paths = generate_model_scatter_plots( models_dir=str(models_dir), + training_csv_path=str(training_csv), ) count = len(scatter_paths) if scatter_paths else 0 parts.append(f"散点图: {count} 个") diff --git a/src/gui/core/worker_thread.py b/src/gui/core/worker_thread.py index 62da5b7..aaef421 100644 --- a/src/gui/core/worker_thread.py +++ b/src/gui/core/worker_thread.py @@ -54,16 +54,16 @@ def diagnose_pipeline_import_error(): "[INFO] PyInstaller 环境:Pipeline 从程序内置包加载,跳过对仓库路径 src/core/*.py 的磁盘检查" ) else: - pipeline_file = os.path.normpath( - os.path.join(os.path.dirname(__file__), "..", "..", "core", "water_quality_inversion_pipeline_GUI.py") + handlers_dir = os.path.normpath( + os.path.join(os.path.dirname(__file__), "..", "..", "core", "handlers") ) - if not os.path.exists(pipeline_file): - error_info.append(f"[ERROR] Pipeline文件不存在: {pipeline_file}") + if not os.path.isdir(handlers_dir): + error_info.append(f"[ERROR] Handlers 目录不存在: {handlers_dir}") error_info.append( - " 解决方案: 请确保项目结构完整,检查 src/core/ 下是否有 water_quality_inversion_pipeline_GUI.py" + " 解决方案: 请确保项目结构完整,检查 src/core/handlers/ 目录是否存在" ) else: - error_info.append(f"[OK] Pipeline文件存在: {pipeline_file}") + error_info.append(f"[OK] Handlers 目录存在: {handlers_dir}") current_dir = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) if current_dir not in sys.path: @@ -240,24 +240,34 @@ class WorkerThread(QThread): self.log_message.emit(f" [WARNING] {message}", "warning") def run(self): - """运行 pipeline:子线程内切换 Matplotlib 为 Agg,避免 Qt5Agg 在后台线程绘图导致界面卡死。""" + """运行 pipeline:子线程内切换 Matplotlib 为 Agg,避免 Qt5Agg 在后台线程绘图导致界面卡死。 + + 终极防崩溃设计: + - 整个 run() 方法体包裹在单一 try/except 中 + - 任何未预期的异常都会被捕获并通过 finished 信号回报主线程 + - 确保前端永远不会面对"静默死亡"的后台线程 + """ import os - # GDAL 环境变量保护(放在最前面,防止路径/编码问题) os.environ['GDAL_FILENAME_IS_UTF8'] = 'YES' os.environ['SHAPE_ENCODING'] = 'UTF-8' mpl_prev = None try: - import matplotlib - mpl_prev = matplotlib.get_backend() - except Exception: - pass - try: - import matplotlib.pyplot as plt - plt.switch_backend("Agg") - except Exception: - mpl_prev = None - try: + # ★ 终端即时反馈 + print(f"\n[WorkerThread] 后台线程启动 (mode={self.mode}, work_dir={self.work_dir})") + + # ── Matplotlib 后端切换(Agg 线程安全) ── + try: + import matplotlib + mpl_prev = matplotlib.get_backend() + except Exception: + pass + try: + import matplotlib.pyplot as plt + plt.switch_backend("Agg") + except Exception: + mpl_prev = None + # ── 新架构:PipelineScheduler + Handler 注册表 ── scheduler = PipelineScheduler(work_dir=self.work_dir) scheduler.set_callback(self.pipeline_callback) @@ -267,14 +277,17 @@ class WorkerThread(QThread): if self.mode == 'full': self.log_message.emit("开始运行完整流程 (Handler 调度模式)...", "info") - # ── ★ 预检已由 GUI 层 perform_preflight() 完成,此处不再重复预检 ── - # 过滤 skip_list 中的步骤 active_config = { k: v for k, v in self.config.items() if k not in self.skip_list } + self.log_message.emit( + f"[调度] 待执行步骤 ({len(active_config)} 个): {list(active_config.keys())}", + "info" + ) + result = scheduler.run_full_pipeline(active_config) errors = result.get('errors', {}) @@ -295,16 +308,28 @@ class WorkerThread(QThread): self.progress_update.emit(100, f"步骤 {self.step_name} 执行完成") self.finished.emit(True, f"步骤 {self.step_name} 独立运行成功!") + except PipelineHalt as exc: # 预检失败 / 硬终止:透传清晰错误信息,不打印完整 traceback error_msg = str(exc) self.log_message.emit(f"[预检失败] {error_msg}", "error") self.finished.emit(False, error_msg) + except Exception as e: - error_msg = f"执行失败: {str(e)}\n{traceback.format_exc()}" - self.log_message.emit(error_msg, "error") - self.finished.emit(False, error_msg) + # ★ 终极捕获:任何未预期的异常都会被完整回报 + full_tb = traceback.format_exc() + self.log_message.emit(f"[致命错误] 后台线程崩溃: {e}", "error") + self.log_message.emit(f"详细追踪:\n{full_tb}", "error") + # 同时 print 到终端(确保即使信号失效也能看到) + print(f"\n{'='*60}") + print(f"[WorkerThread 崩溃] {e}") + print(f"{'='*60}") + print(full_tb) + print(f"{'='*60}\n") + self.finished.emit(False, f"后台线程崩溃: {e}\n\n{full_tb}") + finally: + # ── 恢复 Matplotlib 后端 ── if mpl_prev: try: import matplotlib.pyplot as plt diff --git a/src/gui/panels/step10_watercolor_panel.py b/src/gui/panels/step10_watercolor_panel.py index 615c27b..580cbdb 100644 --- a/src/gui/panels/step10_watercolor_panel.py +++ b/src/gui/panels/step10_watercolor_panel.py @@ -243,7 +243,7 @@ class Step10WatercolorPanel(QWidget): self.run_btn = QPushButton("▶ 执行水色指数反演") self.run_btn.setStyleSheet(ModernStylesheet.get_button_stylesheet('success')) - self.run_btn.clicked.connect(self.run_step) + self.run_btn.clicked.connect(self._on_run_single_clicked) layout.addWidget(self.run_btn) layout.addStretch() @@ -484,7 +484,54 @@ class Step10WatercolorPanel(QWidget): if not self.output_dir.get_path(): self.output_dir.set_path(out_dir) + def _on_run_single_clicked(self): + """通过 EventBus 发布单步执行请求(解耦面板与 PipelineExecutor)。""" + from src.gui.core.event_bus import global_event_bus + + bsq_path = self.bsq_file.get_path().strip() + hdr_path = self.hdr_file.get_path().strip() + output_dir = self.output_dir.get_path().strip() + + if not bsq_path: + QMessageBox.warning(self, "输入错误", "请选择去耀斑 BSQ 影像!") + return + if not Path(bsq_path).exists(): + QMessageBox.warning(self, "输入错误", f"BSQ 影像不存在:\n{bsq_path}") + return + if not hdr_path: + auto_hdr = Path(bsq_path).with_suffix('.hdr') + if auto_hdr.exists(): + hdr_path = str(auto_hdr) + self.hdr_file.set_path(hdr_path) + else: + QMessageBox.warning(self, "输入错误", "请选择 ENVI 头文件!") + return + if not Path(hdr_path).exists(): + QMessageBox.warning(self, "输入错误", f"HDR 文件不存在:\n{hdr_path}") + return + if not output_dir: + work_dir = self._get_default_work_dir() + output_dir = resolve_subdir(work_dir, 'watercolor') + os.makedirs(output_dir, exist_ok=True) + self.output_dir.set_path(output_dir) + + selected = self._get_selected_formula_names() + if not selected: + QMessageBox.warning(self, "输入错误", "请至少选择一个公式!") + return + + if self._waterindex_csv and not Path(self._waterindex_csv).exists(): + QMessageBox.warning(self, "配置错误", f"waterindex.csv 不存在:\n{self._waterindex_csv}") + return + + config = {'step10': self.get_config()} + global_event_bus.publish('RequestRunSingleStep', { + 'step_name': 'step10', + 'config': config, + }) + def run_step(self): + """独立运行步骤10(旧版 parent 链上溯方式,保留兼容)。""" bsq_path = self.bsq_file.get_path().strip() hdr_path = self.hdr_file.get_path().strip() output_dir = self.output_dir.get_path().strip() diff --git a/src/gui/panels/step11_map_panel.py b/src/gui/panels/step11_map_panel.py index 949be07..6d454b6 100644 --- a/src/gui/panels/step11_map_panel.py +++ b/src/gui/panels/step11_map_panel.py @@ -27,12 +27,7 @@ from PyQt5.QtWidgets import ( from src.gui.components.custom_widgets import FileSelectWidget from src.gui.styles import ModernStylesheet -# Pipeline 可用性(与 core/worker_thread.py 保持一致) -try: - from src.core.water_quality_inversion_pipeline_GUI import WaterQualityInversionPipeline - PIPELINE_AVAILABLE = True -except ImportError: - PIPELINE_AVAILABLE = False +PIPELINE_AVAILABLE = True class Step11MapBatchThread(QThread): @@ -63,19 +58,19 @@ class Step11MapBatchThread(QThread): except Exception: mpl_prev = None try: - from src.core.water_quality_inversion_pipeline_GUI import WaterQualityInversionPipeline - pipeline = WaterQualityInversionPipeline(work_dir=self.work_dir) + from src.core.steps.mapping_step import MappingStep n = len(self.csv_paths) for i, csv_p in enumerate(self.csv_paths): self.progress.emit(i + 1, n) self.log_message.emit(f"专题图 [{i + 1}/{n}] {csv_p}", "info") - kw = {**self.step10_kwargs, "prediction_csv_path": csv_p, "skip_dependency_check": True} + kw = {**self.step10_kwargs, "prediction_csv_path": csv_p} + kw.pop("skip_dependency_check", None) if self.output_dir_optional: stem = Path(csv_p).stem kw["output_image_path"] = str(Path(self.output_dir_optional) / f"{stem}_distribution.png") else: kw["output_image_path"] = None - pipeline.step10_map(**kw) + MappingStep.generate_distribution_map(**kw) self.finished_ok.emit(n) except Exception as e: self.failed.emit(f"{e}\n{traceback.format_exc()}") diff --git a/src/gui/panels/step12_viz_panel.py b/src/gui/panels/step12_viz_panel.py index cd60e1a..7266989 100644 --- a/src/gui/panels/step12_viz_panel.py +++ b/src/gui/panels/step12_viz_panel.py @@ -32,12 +32,7 @@ from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas from matplotlib.backends.backend_qt5agg import NavigationToolbar2QT as NavigationToolbar from matplotlib.figure import Figure -# Pipeline 可用性(与 core/worker_thread.py 保持一致) -try: - from src.core.water_quality_inversion_pipeline_GUI import WaterQualityInversionPipeline - PIPELINE_AVAILABLE = True -except ImportError: - PIPELINE_AVAILABLE = False +PIPELINE_AVAILABLE = True def _viz_training_spectra_csv_path(work_path: Path) -> Path: @@ -208,7 +203,7 @@ class VisualizationWorkerThread(QThread): {"task": "statistics", "output_paths": output_paths} ) elif self.task == "scatter": - from src.core.water_quality_inversion_pipeline_GUI import WaterQualityInversionPipeline + from src.core.visualization.scatter_plot import generate_model_scatter_plots training_csv_path = (self.extra.get("training_csv_path") or "").strip() models_dir = (self.extra.get("models_dir") or "").strip() @@ -218,10 +213,9 @@ class VisualizationWorkerThread(QThread): if not models_dir or not Path(models_dir).is_dir(): self.failed.emit("模型目录无效或不存在,请确认步骤6已生成 7_Supervised_Model_Training 下的参数子文件夹。") return - pipeline = WaterQualityInversionPipeline(work_dir=str(wp)) - scatter_paths = pipeline.generate_model_scatter_plots( - training_csv_path=training_csv_path, + scatter_paths = generate_model_scatter_plots( models_dir=models_dir, + training_csv_path=training_csv_path, ) self.finished_ok.emit({"task": "scatter", "scatter_paths": scatter_paths or {}}) elif self.task == "generate_all_selected": @@ -235,11 +229,10 @@ class VisualizationWorkerThread(QThread): if training_csv.is_file(): models_dir = wp / "7_Supervised_Model_Training" if models_dir.is_dir() and any(d.is_dir() for d in models_dir.iterdir()): - from src.core.water_quality_inversion_pipeline_GUI import WaterQualityInversionPipeline - pipeline = WaterQualityInversionPipeline(work_dir=str(wp)) - scatter_paths = pipeline.generate_model_scatter_plots( - training_csv_path=str(training_csv), + from src.core.visualization.scatter_plot import generate_model_scatter_plots + scatter_paths = generate_model_scatter_plots( models_dir=str(models_dir), + training_csv_path=str(training_csv), ) count = len(scatter_paths) if scatter_paths else 0 parts.append(f"散点图: {count} 个") diff --git a/src/gui/panels/step14_panel.py b/src/gui/panels/step14_panel.py index cb9a848..9a8743f 100644 --- a/src/gui/panels/step14_panel.py +++ b/src/gui/panels/step14_panel.py @@ -27,12 +27,7 @@ from PyQt5.QtWidgets import ( from src.gui.components.custom_widgets import FileSelectWidget from src.gui.styles import ModernStylesheet -# Pipeline 可用性(与 core/worker_thread.py 保持一致) -try: - from src.core.water_quality_inversion_pipeline_GUI import WaterQualityInversionPipeline - PIPELINE_AVAILABLE = True -except ImportError: - PIPELINE_AVAILABLE = False +PIPELINE_AVAILABLE = True class Step14BatchThread(QThread): @@ -63,19 +58,19 @@ class Step14BatchThread(QThread): except Exception: mpl_prev = None try: - from src.core.water_quality_inversion_pipeline_GUI import WaterQualityInversionPipeline - pipeline = WaterQualityInversionPipeline(work_dir=self.work_dir) + from src.core.steps.mapping_step import MappingStep n = len(self.csv_paths) for i, csv_p in enumerate(self.csv_paths): self.progress.emit(i + 1, n) self.log_message.emit(f"专题图 [{i + 1}/{n}] {csv_p}", "info") - kw = {**self.step14_kwargs, "prediction_csv_path": csv_p, "skip_dependency_check": True} + kw = {**self.step14_kwargs, "prediction_csv_path": csv_p} + kw.pop("skip_dependency_check", None) if self.output_dir_optional: stem = Path(csv_p).stem kw["output_image_path"] = str(Path(self.output_dir_optional) / f"{stem}_distribution.png") else: kw["output_image_path"] = None - pipeline.step10_map(**kw) + MappingStep.generate_distribution_map(**kw) self.finished_ok.emit(n) except Exception as e: self.failed.emit(f"{e}\n{traceback.format_exc()}") diff --git a/src/gui/panels/step1_panel.py b/src/gui/panels/step1_panel.py index 1a110c9..1c616bf 100644 --- a/src/gui/panels/step1_panel.py +++ b/src/gui/panels/step1_panel.py @@ -144,7 +144,7 @@ class Step1Panel(QWidget): # 独立运行按钮 self.run_btn = QPushButton("独立运行此步骤") self.run_btn.setStyleSheet(ModernStylesheet.get_button_stylesheet('success')) - self.run_btn.clicked.connect(self.run_step) + self.run_btn.clicked.connect(self._on_run_single_clicked) layout.addWidget(self.run_btn) # 连接信号 @@ -257,8 +257,40 @@ class Step1Panel(QWidget): self.update_ui_state() + def _on_run_single_clicked(self): + """通过 EventBus 发布单步执行请求(解耦面板与 PipelineExecutor)。 + + 替代旧有的 parent 链上溯查找 run_single_step 的紧耦合方式。 + PipelineExecutor 在 __init__ 中订阅 RequestRunSingleStep 事件, + 收到后调用 run_single_step(step_name, config) 统一处理预检/工作目录/执行。 + """ + from src.gui.core.event_bus import global_event_bus + + # 验证输入(与旧 run_step 逻辑一致) + if self.use_ndwi_radio.isChecked(): + img_path = self.img_file.get_path() + if not img_path: + QMessageBox.warning(self, "输入错误", "请选择参考影像文件!") + return + else: + mask_path = self.mask_file.get_path() + if not mask_path: + QMessageBox.warning(self, "输入错误", "请选择掩膜文件!") + return + if mask_path.lower().endswith('.shp'): + img_path = self.img_file.get_path() + if not img_path: + QMessageBox.warning(self, "输入错误", "当使用shp文件时,需要提供参考影像用于栅格化!") + return + + config = {'step1': self.get_config()} + global_event_bus.publish('RequestRunSingleStep', { + 'step_name': 'step1', + 'config': config, + }) + def run_step(self): - """独立运行步骤1""" + """独立运行步骤1(旧版 parent 链上溯方式,保留兼容)。""" # 验证输入 if self.use_ndwi_radio.isChecked(): # NDWI模式:需要影像文件 diff --git a/src/gui/panels/step2_panel.py b/src/gui/panels/step2_panel.py index 7ececfe..6c54714 100644 --- a/src/gui/panels/step2_panel.py +++ b/src/gui/panels/step2_panel.py @@ -108,7 +108,7 @@ class Step2Panel(QWidget): # 独立运行按钮 self.run_btn = QPushButton("独立运行此步骤") self.run_btn.setStyleSheet(ModernStylesheet.get_button_stylesheet('success')) - self.run_btn.clicked.connect(self.run_step) + self.run_btn.clicked.connect(self._on_run_single_clicked) layout.addWidget(self.run_btn) layout.addStretch() @@ -203,8 +203,23 @@ class Step2Panel(QWidget): # 没有工作目录时,清空输出路径 self.output_file.set_path("") + def _on_run_single_clicked(self): + """通过 EventBus 发布单步执行请求(解耦面板与 PipelineExecutor)。""" + from src.gui.core.event_bus import global_event_bus + + img_path = self.img_file.get_path() + if not img_path: + QMessageBox.warning(self, "输入错误", "请选择影像文件!") + return + + config = {'step2': self.get_config()} + global_event_bus.publish('RequestRunSingleStep', { + 'step_name': 'step2', + 'config': config, + }) + def run_step(self): - """独立运行步骤2""" + """独立运行步骤2(旧版 parent 链上溯方式,保留兼容)。""" # 验证输入 img_path = self.img_file.get_path() if not img_path: diff --git a/src/gui/panels/step3_panel.py b/src/gui/panels/step3_panel.py index 48bc54a..c7bfdd9 100644 --- a/src/gui/panels/step3_panel.py +++ b/src/gui/panels/step3_panel.py @@ -228,7 +228,7 @@ class Step3Panel(QWidget): # 独立运行按钮 self.run_btn = QPushButton("独立运行此步骤") self.run_btn.setStyleSheet(ModernStylesheet.get_button_stylesheet('success')) - self.run_btn.clicked.connect(self.run_step) + self.run_btn.clicked.connect(self._on_run_single_clicked) layout.addWidget(self.run_btn) layout.addStretch() @@ -433,8 +433,34 @@ class Step3Panel(QWidget): if 'sugar_bounds' in config: self.sugar_bounds.setText(str(config['sugar_bounds'])) + def _on_run_single_clicked(self): + """通过 EventBus 发布单步执行请求(解耦面板与 PipelineExecutor)。""" + from src.gui.core.event_bus import global_event_bus + + img_path = self.img_file.get_path() + if not img_path: + QMessageBox.warning(self, "输入错误", "请选择影像文件!") + return + if self.enable_checkbox.isChecked(): + water_mask_path = self.water_mask_file.get_path() + if not water_mask_path: + QMessageBox.warning( + self, + "输入错误", + "独立运行耀斑去除时,必须选择水域掩膜或边界文件。\n\n" + "请提供与当前影像空间一致的水域栅格掩膜(.dat/.tif),或水域矢量边界(.shp)。\n" + "若刚跑过完整流程,可使用步骤1生成的水域掩膜文件。", + ) + return + + config = {'step3': self.get_config()} + global_event_bus.publish('RequestRunSingleStep', { + 'step_name': 'step3', + 'config': config, + }) + def run_step(self): - """独立运行步骤3""" + """独立运行步骤3(旧版 parent 链上溯方式,保留兼容)。""" # 验证输入 img_path = self.img_file.get_path() if not img_path: diff --git a/src/gui/panels/step4_sampling_panel.py b/src/gui/panels/step4_sampling_panel.py index 542d6bf..45fc0cf 100644 --- a/src/gui/panels/step4_sampling_panel.py +++ b/src/gui/panels/step4_sampling_panel.py @@ -91,7 +91,7 @@ class Step4SamplingPanel(QWidget): # 独立运行按钮 self.run_btn = QPushButton("独立运行此步骤") self.run_btn.setStyleSheet(ModernStylesheet.get_button_stylesheet('success')) - self.run_btn.clicked.connect(self.run_step) + self.run_btn.clicked.connect(self._on_run_single_clicked) layout.addWidget(self.run_btn) # 交互式预览按钮 @@ -228,8 +228,23 @@ class Step4SamplingPanel(QWidget): # 4. 同步更新预览按钮状态(路径可能已自动填充) self._check_csv_exists() + def _on_run_single_clicked(self): + """通过 EventBus 发布单步执行请求(解耦面板与 PipelineExecutor)。""" + from src.gui.core.event_bus import global_event_bus + + deglint_img_path = self.deglint_img_file.get_path() + if not deglint_img_path: + QMessageBox.warning(self, "输入错误", "请选择去耀斑影像文件!") + return + + config = {'step4_sampling': self.get_config()} + global_event_bus.publish('RequestRunSingleStep', { + 'step_name': 'step4_sampling', + 'config': config, + }) + def run_step(self): - """独立运行步骤4""" + """独立运行步骤4(旧版 parent 链上溯方式,保留兼容)。""" deglint_img_path = self.deglint_img_file.get_path() if not deglint_img_path: QMessageBox.warning(self, "输入错误", "请选择去耀斑影像文件!") diff --git a/src/gui/panels/step5_clean_panel.py b/src/gui/panels/step5_clean_panel.py index 229f672..a04e63b 100644 --- a/src/gui/panels/step5_clean_panel.py +++ b/src/gui/panels/step5_clean_panel.py @@ -95,7 +95,7 @@ class Step5CleanPanel(QWidget): # 独立运行按钮 self.run_btn = QPushButton("独立运行此步骤") self.run_btn.setStyleSheet(ModernStylesheet.get_button_stylesheet('success')) - self.run_btn.clicked.connect(self.run_step) + self.run_btn.clicked.connect(self._on_run_single_clicked) layout.addWidget(self.run_btn) layout.addStretch() @@ -142,8 +142,23 @@ class Step5CleanPanel(QWidget): else: self.output_file.set_path("") + def _on_run_single_clicked(self): + """通过 EventBus 发布单步执行请求(解耦面板与 PipelineExecutor)。""" + from src.gui.core.event_bus import global_event_bus + + csv_path = self.csv_file.get_path() + if not csv_path: + QMessageBox.warning(self, "输入错误", "请选择水质参数文件!") + return + + config = {'step5_clean': self.get_config()} + global_event_bus.publish('RequestRunSingleStep', { + 'step_name': 'step5_clean', + 'config': config, + }) + def run_step(self): - """独立运行步骤5""" + """独立运行步骤5(旧版 parent 链上溯方式,保留兼容)。""" csv_path = self.csv_file.get_path() if not csv_path: QMessageBox.warning(self, "输入错误", "请选择水质参数文件!") diff --git a/src/gui/panels/step6_feature_panel.py b/src/gui/panels/step6_feature_panel.py index 5649f8d..6f0a690 100644 --- a/src/gui/panels/step6_feature_panel.py +++ b/src/gui/panels/step6_feature_panel.py @@ -106,7 +106,7 @@ class Step6FeaturePanel(QWidget): # 独立运行按钮 self.run_btn = QPushButton("独立运行此步骤") self.run_btn.setStyleSheet(ModernStylesheet.get_button_stylesheet('success')) - self.run_btn.clicked.connect(self.run_step) + self.run_btn.clicked.connect(self._on_run_single_clicked) layout.addWidget(self.run_btn) layout.addStretch() @@ -258,8 +258,35 @@ class Step6FeaturePanel(QWidget): if not existing_csv or not existing_csv.strip(): self.csv_file.set_path(step5_clean_output_path) + def _on_run_single_clicked(self): + """通过 EventBus 发布单步执行请求(解耦面板与 PipelineExecutor)。""" + from src.gui.core.event_bus import global_event_bus + + deglint_img_path = self.deglint_img_file.get_path() + csv_path = self.csv_file.get_path() + if not deglint_img_path: + QMessageBox.warning(self, "输入错误", "请选择去耀斑影像文件!") + return + if not csv_path: + QMessageBox.warning(self, "输入错误", "请选择处理后的CSV文件!") + return + if not self.glint_mask_file.get_path(): + QMessageBox.warning( + self, + "输入错误", + "独立运行光谱特征提取时,必须选择耀斑掩膜文件。\n\n" + "请提供与去耀斑影像对应的耀斑二值掩膜(一般为步骤2输出的 severe_glint_area.dat)。", + ) + return + + config = {'step6_feature': self.get_config()} + global_event_bus.publish('RequestRunSingleStep', { + 'step_name': 'step6_feature', + 'config': config, + }) + def run_step(self): - """独立运行步骤6""" + """独立运行步骤6(旧版 parent 链上溯方式,保留兼容)。""" # 验证输入 deglint_img_path = self.deglint_img_file.get_path() csv_path = self.csv_file.get_path() diff --git a/src/gui/panels/step8_ml_train_panel.py b/src/gui/panels/step8_ml_train_panel.py index 2004391..d410b1e 100644 --- a/src/gui/panels/step8_ml_train_panel.py +++ b/src/gui/panels/step8_ml_train_panel.py @@ -119,7 +119,7 @@ class Step8MlTrainPanel(QWidget): # 独立运行按钮 self.run_btn = QPushButton("独立运行此步骤") self.run_btn.setStyleSheet(ModernStylesheet.get_button_stylesheet('success')) - self.run_btn.clicked.connect(self.run_step) + self.run_btn.clicked.connect(self._on_run_single_clicked) layout.addWidget(self.run_btn) layout.addStretch() @@ -398,8 +398,23 @@ class Step8MlTrainPanel(QWidget): else: self.output_path.set_path("") + def _on_run_single_clicked(self): + """通过 EventBus 发布单步执行请求(解耦面板与 PipelineExecutor)。""" + from src.gui.core.event_bus import global_event_bus + + training_csv_path = self.training_csv_file.get_path() + if not training_csv_path: + QMessageBox.warning(self, "输入错误", "请选择训练数据CSV文件!") + return + + config = {'step8_ml_train': self.get_config()} + global_event_bus.publish('RequestRunSingleStep', { + 'step_name': 'step8_ml_train', + 'config': config, + }) + def run_step(self): - """独立运行步骤8""" + """独立运行步骤8(旧版 parent 链上溯方式,保留兼容)。""" training_csv_path = self.training_csv_file.get_path() if not training_csv_path: QMessageBox.warning(self, "输入错误", "请选择训练数据CSV文件!") diff --git a/src/gui/panels/step8_qaa_panel.py b/src/gui/panels/step8_qaa_panel.py index 6edbfc8..47667f0 100644 --- a/src/gui/panels/step8_qaa_panel.py +++ b/src/gui/panels/step8_qaa_panel.py @@ -109,7 +109,7 @@ class Step8QAAPanel(QWidget): # 独立运行按钮 self.run_btn = QPushButton("执行 QAA 反演") self.run_btn.setStyleSheet(ModernStylesheet.get_button_stylesheet('success')) - self.run_btn.clicked.connect(self.run_step) + self.run_btn.clicked.connect(self._on_run_single_clicked) layout.addWidget(self.run_btn) layout.addStretch() @@ -212,8 +212,23 @@ class Step8QAAPanel(QWidget): else: self.output_path.set_path("") + def _on_run_single_clicked(self): + """通过 EventBus 发布单步执行请求(解耦面板与 PipelineExecutor)。""" + from src.gui.core.event_bus import global_event_bus + + spectrum_path = self.spectrum_csv_file.get_path() + if not spectrum_path: + QMessageBox.warning(self, "输入错误", "请选择光谱 CSV 文件!") + return + + config = {'step8_qaa': self.get_config()} + global_event_bus.publish('RequestRunSingleStep', { + 'step_name': 'step8_qaa', + 'config': config, + }) + def run_step(self): - """独立运行 QAA 反演""" + """独立运行 QAA 反演(旧版 parent 链上溯方式,保留兼容)。""" spectrum_path = self.spectrum_csv_file.get_path() if not spectrum_path: QMessageBox.warning(self, "输入错误", "请选择光谱 CSV 文件!") diff --git a/src/gui/panels/step9_ml_predict_panel.py b/src/gui/panels/step9_ml_predict_panel.py index 81c45e2..8f05f83 100644 --- a/src/gui/panels/step9_ml_predict_panel.py +++ b/src/gui/panels/step9_ml_predict_panel.py @@ -175,7 +175,7 @@ class Step9MlPredictPanel(QWidget): # 独立运行按钮 self.run_btn = QPushButton("独立运行此步骤") self.run_btn.setStyleSheet(ModernStylesheet.get_button_stylesheet('success')) - self.run_btn.clicked.connect(self.run_step) + self.run_btn.clicked.connect(self._on_run_single_clicked) layout.addWidget(self.run_btn) layout.addStretch() @@ -414,8 +414,57 @@ class Step9MlPredictPanel(QWidget): if 'output_path' in config: self.output_file.set_path(config['output_path']) + def _on_run_single_clicked(self): + """通过 EventBus 发布单步执行请求(解耦面板与 PipelineExecutor)。""" + from src.gui.core.event_bus import global_event_bus + + sampling_csv_path = self.sampling_csv_file.get_path() + if not sampling_csv_path: + QMessageBox.warning(self, "输入错误", "请选择采样光谱CSV文件!") + return + + # 外部模型优先:用户选择了"导入本地预训练模型" + if self.use_external_model.isChecked(): + if not self.external_models_dict: + QMessageBox.warning( + self, + "模型未加载", + "请先点击「浏览...」按钮选择模型母文件夹!", + ) + return + checked_dict = self._get_checked_models_dict() + if not checked_dict: + QMessageBox.warning( + self, + "未选择模型", + "请至少勾选一个模型参与预测!", + ) + return + config = { + 'step9_ml_predict': self.get_config(), + '_external_models_dict': checked_dict, + '_external_model_dir': self.external_model_dir, + } + global_event_bus.publish('RequestRunSingleStep', { + 'step_name': 'step9_ml_predict', + 'config': config, + }) + return + + # 默认流程:使用模型目录 + models_dir = self.models_dir_file.get_path() + if not models_dir: + QMessageBox.warning(self, "输入错误", "请选择模型目录!") + return + + config = {'step9_ml_predict': self.get_config()} + global_event_bus.publish('RequestRunSingleStep', { + 'step_name': 'step9_ml_predict', + 'config': config, + }) + def run_step(self): - """独立运行步骤11""" + """独立运行步骤11(旧版 parent 链上溯方式,保留兼容)。""" sampling_csv_path = self.sampling_csv_file.get_path() if not sampling_csv_path: QMessageBox.warning(self, "输入错误", "请选择采样光谱CSV文件!") diff --git a/src/gui/water_quality_gui.py b/src/gui/water_quality_gui.py index c55dac0..bc55e22 100644 --- a/src/gui/water_quality_gui.py +++ b/src/gui/water_quality_gui.py @@ -1244,7 +1244,7 @@ class WaterQualityGUI(QMainWindow): if not PIPELINE_AVAILABLE: QMessageBox.critical( self, "错误", - "无法导入pipeline模块,请确保water_quality_inversion_pipeline_GUI.py文件存在!" + "无法导入 Pipeline 模块,请检查 src/core/handlers/ 目录是否完整!" ) return @@ -1400,7 +1400,7 @@ class WaterQualityGUI(QMainWindow): if not PIPELINE_AVAILABLE: QMessageBox.critical( self, "错误", - "无法导入pipeline模块,请确保water_quality_inversion_pipeline_GUI.py文件存在!" + "无法导入 Pipeline 模块,请检查 src/core/handlers/ 目录是否完整!" ) return