From f73a7d89990941c6908d30106bc4f2e4d7c8050a Mon Sep 17 00:00:00 2001 From: DXC Date: Fri, 12 Jun 2026 16:48:20 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E5=85=AC=E5=BC=8F=E6=96=B9?= =?UTF-8?q?=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../waterindex_inversion/__init__.py | 6 +- src/core/glint_removal/get_spectral.py | 54 +++++++++++++- src/gui/panels/step11_map_panel.py | 26 +++---- src/gui/panels/step14_panel.py | 26 +++---- src/gui/water_quality_gui.py | 32 ++++---- .../process_water_quality_data.py | 74 +++++++++++++++---- tmp_watercolor_rescue.py | 6 +- 7 files changed, 162 insertions(+), 62 deletions(-) diff --git a/src/core/algorithms/waterindex_inversion/__init__.py b/src/core/algorithms/waterindex_inversion/__init__.py index f58eae9..eb41cb9 100644 --- a/src/core/algorithms/waterindex_inversion/__init__.py +++ b/src/core/algorithms/waterindex_inversion/__init__.py @@ -348,7 +348,7 @@ class WaterIndexProcessor: hdr_path : str, optional ENVI HDR 文件路径(None → 自动构造) output_dir : str, optional - 输出目录(None → 与 bsq_path 同目录下的 8_WaterIndex_Images/) + 输出目录(None → 与 bsq_path 同目录下的 10_WaterIndex_Images/) formula_names : list, optional 要处理的公式名列表(None → 处理全部) water_mask : np.ndarray, optional @@ -374,7 +374,7 @@ class WaterIndexProcessor: # ── 自动构造输出目录 ──────────────────────────────────────────── if output_dir is None: - output_dir = os.path.join(os.path.dirname(bsq_path), '8_WaterIndex_Images') + output_dir = os.path.join(os.path.dirname(bsq_path), '10_WaterIndex_Images') os.makedirs(output_dir, exist_ok=True) def progress(msg: str, pct: float): @@ -610,7 +610,7 @@ class WaterIndexProcessor: if os.path.isfile(hdr_path_alt): hdr_path = hdr_path_alt - output_dir = os.path.join(work_dir, "8_WaterIndex_Images") + output_dir = os.path.join(work_dir, "10_WaterIndex_Images") # ── 加载水域掩膜(可选)─────────────────────────────────────── water_mask: Optional[np.ndarray] = None diff --git a/src/core/glint_removal/get_spectral.py b/src/core/glint_removal/get_spectral.py index cf498fd..1b81151 100644 --- a/src/core/glint_removal/get_spectral.py +++ b/src/core/glint_removal/get_spectral.py @@ -2,6 +2,7 @@ from osgeo import gdal, osr import numpy as np import pandas as pd import os +import re import spectral from math import sin, cos, tan, sqrt, radians @@ -473,9 +474,56 @@ def get_spectral_in_coor(imgpath, coorpath, outpath, radius=0, flare_path=None, for i in range(min(3, coor_data.shape[0])): print(f" 行{i + 1}: {coor_data[i, :min(5, coor_data.shape[1])]}") # 只显示前5列 - # 提取原始坐标 - lat_array = coor_data[:, 1] # 第2列是纬度(跳过测量点ID列) - lon_array = coor_data[:, 2] # 第3列是经度 + # 提取原始坐标(使用智能坐标列检测) + lon_patterns = [ + r'^lon', r'^lng', r'^longitude', r'经度', r'^x$', r'^utm_x$', r'^pixel_x$' + ] + lat_patterns = [ + r'^lat', r'^latitude', r'纬度', r'^y$', r'^utm_y$', r'^pixel_y$' + ] + + x_col_name, y_col_name = None, None + + if coor_df is not None and hasattr(coor_df, 'columns'): + for col in coor_df.columns: + col_str = str(col).lower().strip() + if x_col_name is None and any(re.search(p, col_str) for p in lon_patterns): + x_col_name = col + if y_col_name is None and any(re.search(p, col_str) for p in lat_patterns): + y_col_name = col + + if x_col_name and y_col_name and x_col_name in coor_df.columns and y_col_name in coor_df.columns: + lon_array = coor_df[x_col_name].values + lat_array = coor_df[y_col_name].values + print(f"💡 坐标列名检测: X/经度=[{x_col_name}], Y/纬度=[{y_col_name}]") + else: + numeric_cols = coor_df.select_dtypes(include=[np.number]).columns.tolist() if coor_df is not None else [] + if len(numeric_cols) >= 2: + col1, col2 = numeric_cols[0], numeric_cols[1] + mean1 = coor_df[col1].head(10).mean() + mean2 = coor_df[col2].head(10).mean() + if abs(mean1) <= 90 and abs(mean2) > 90: + y_col_name, x_col_name = col1, col2 + lon_array = coor_df[x_col_name].values + lat_array = coor_df[y_col_name].values + elif abs(mean2) <= 90 and abs(mean1) > 90: + x_col_name, y_col_name = col1, col2 + lon_array = coor_df[x_col_name].values + lat_array = coor_df[y_col_name].values + else: + if mean1 > mean2: + x_col_name, y_col_name = col1, col2 + else: + x_col_name, y_col_name = col2, col1 + lon_array = coor_df[x_col_name].values + lat_array = coor_df[y_col_name].values + print(f"💡 触发智能数值推断坐标列: X/经度=[{x_col_name}], Y/纬度=[{y_col_name}]") + else: + if coor_data is not None and coor_data.shape[1] >= 3: + lat_array = coor_data[:, 1] + lon_array = coor_data[:, 2] + else: + raise Exception("坐标文件格式错误:需要至少2列数据,且最好包含坐标列名(如lon/lat/经度/纬度)") print(f"\n=== 原始坐标信息 ===") print(f"原始坐标范围: 经度 {np.min(lon_array):.6f} ~ {np.max(lon_array):.6f}, 纬度 {np.min(lat_array):.6f} ~ {np.max(lat_array):.6f}") diff --git a/src/gui/panels/step11_map_panel.py b/src/gui/panels/step11_map_panel.py index 014a105..675d154 100644 --- a/src/gui/panels/step11_map_panel.py +++ b/src/gui/panels/step11_map_panel.py @@ -253,7 +253,7 @@ class Step11MapPanel(QWidget): self.geotiff_dir_label = QLabel("水色指数目录:") self.geotiff_dir_label.setMinimumWidth(120) self.geotiff_dir_edit = QLineEdit() - self.geotiff_dir_edit.setPlaceholderText("选择 8_WaterIndex_Images 文件夹(批量渲染)…") + self.geotiff_dir_edit.setPlaceholderText("选择 10_WaterIndex_Images 文件夹(批量渲染)…") geotiff_dir_btn = QPushButton("浏览…") geotiff_dir_btn.setMaximumWidth(80) geotiff_dir_btn.clicked.connect(self.browse_geotiff_dir) @@ -392,7 +392,7 @@ class Step11MapPanel(QWidget): """浏览 GeoTIFF 文件夹(批量模式)""" default = self._get_default_work_dir() if default: - default = os.path.join(default, "8_WaterIndex_Images") + default = os.path.join(default, "10_WaterIndex_Images") d = QFileDialog.getExistingDirectory( self, "选择水色指数 GeoTIFF 文件夹", default ) @@ -514,18 +514,18 @@ class Step11MapPanel(QWidget): pred_dir = None if hasattr(main_window, 'step11_prediction_panel'): step8_widget = getattr(main_window.step11_prediction_panel, 'output_file', None) - step8_output = "" + step10_output = "" if hasattr(step8_widget, 'get_path'): - step8_output = step8_widget.get_path() or "" + step10_output = step8_widget.get_path() or "" elif hasattr(step8_widget, 'text'): - step8_output = step8_widget.text() or "" + step10_output = step8_widget.text() or "" - if step8_output: + if step10_output: # 若为相对路径,使用 work_dir 合成为绝对路径 - if not os.path.isabs(step8_output): - step8_output = os.path.join(self.work_dir or '', step8_output).replace('\\', '/') + if not os.path.isabs(step10_output): + step10_output = os.path.join(self.work_dir or '', step10_output).replace('\\', '/') # 提取父目录后追加 Machine_Learning_Prediction(最底层真实子目录) - base_pred_dir = str(Path(step8_output).parent) + base_pred_dir = str(Path(step10_output).parent) ml_pred_dir = Path(base_pred_dir) / "Machine_Learning_Prediction" pred_dir = str(ml_pred_dir) if ml_pred_dir.exists() else base_pred_dir @@ -594,13 +594,13 @@ class Step11MapPanel(QWidget): print("⚠️ 提示:专题图生成模块需传入标准矢量边界文件 (.shp),请手动选择。") # 6. 自动探测 Step 8 输出的水色指数 GeoTIFF(GeoTIFF 渲染模式) - step8_out_dir = Path(self.work_dir) / "8_WaterIndex_Images" if self.work_dir else None - if step8_out_dir and step8_out_dir.is_dir(): + step10_out_dir = Path(self.work_dir) / "10_WaterIndex_Images" if self.work_dir else None + if step10_out_dir and step10_out_dir.is_dir(): # GeoTIFF 批量模式:填充目录供批量渲染 if not (self.geotiff_dir_edit.text() or "").strip(): - self.geotiff_dir_edit.setText(str(step8_out_dir)) + self.geotiff_dir_edit.setText(str(step10_out_dir)) # GeoTIFF 单文件模式:默认选中第一个 - tif_files = sorted(step8_out_dir.glob("*.tif")) + tif_files = sorted(step10_out_dir.glob("*.tif")) if tif_files and not (self.geotiff_file.get_path() or "").strip(): self.geotiff_file.set_path(str(tif_files[0])) except Exception as e: diff --git a/src/gui/panels/step14_panel.py b/src/gui/panels/step14_panel.py index d94529d..2c58486 100644 --- a/src/gui/panels/step14_panel.py +++ b/src/gui/panels/step14_panel.py @@ -253,7 +253,7 @@ class Step14Panel(QWidget): self.geotiff_dir_label = QLabel("水色指数目录:") self.geotiff_dir_label.setMinimumWidth(120) self.geotiff_dir_edit = QLineEdit() - self.geotiff_dir_edit.setPlaceholderText("选择 8_WaterIndex_Images 文件夹(批量渲染)…") + self.geotiff_dir_edit.setPlaceholderText("选择 10_WaterIndex_Images 文件夹(批量渲染)…") geotiff_dir_btn = QPushButton("浏览…") geotiff_dir_btn.setMaximumWidth(80) geotiff_dir_btn.clicked.connect(self.browse_geotiff_dir) @@ -392,7 +392,7 @@ class Step14Panel(QWidget): """浏览 GeoTIFF 文件夹(批量模式)""" default = self._get_default_work_dir() if default: - default = os.path.join(default, "8_WaterIndex_Images") + default = os.path.join(default, "10_WaterIndex_Images") d = QFileDialog.getExistingDirectory( self, "选择水色指数 GeoTIFF 文件夹", default ) @@ -514,18 +514,18 @@ class Step14Panel(QWidget): pred_dir = None if hasattr(main_window, 'step11_prediction_panel'): step8_widget = getattr(main_window.step11_prediction_panel, 'output_file', None) - step8_output = "" + step10_output = "" if hasattr(step8_widget, 'get_path'): - step8_output = step8_widget.get_path() or "" + step10_output = step8_widget.get_path() or "" elif hasattr(step8_widget, 'text'): - step8_output = step8_widget.text() or "" + step10_output = step8_widget.text() or "" - if step8_output: + if step10_output: # 若为相对路径,使用 work_dir 合成为绝对路径 - if not os.path.isabs(step8_output): - step8_output = os.path.join(self.work_dir or '', step8_output).replace('\\', '/') + if not os.path.isabs(step10_output): + step10_output = os.path.join(self.work_dir or '', step10_output).replace('\\', '/') # 提取父目录后追加 Machine_Learning_Prediction(最底层真实子目录) - base_pred_dir = str(Path(step8_output).parent) + base_pred_dir = str(Path(step10_output).parent) ml_pred_dir = Path(base_pred_dir) / "Machine_Learning_Prediction" pred_dir = str(ml_pred_dir) if ml_pred_dir.exists() else base_pred_dir @@ -594,13 +594,13 @@ class Step14Panel(QWidget): print("⚠️ 提示:专题图生成模块需传入标准矢量边界文件 (.shp),请手动选择。") # 6. 自动探测 Step 8 输出的水色指数 GeoTIFF(GeoTIFF 渲染模式) - step8_out_dir = Path(self.work_dir) / "8_WaterIndex_Images" if self.work_dir else None - if step8_out_dir and step8_out_dir.is_dir(): + step10_out_dir = Path(self.work_dir) / "10_WaterIndex_Images" if self.work_dir else None + if step10_out_dir and step10_out_dir.is_dir(): # GeoTIFF 批量模式:填充目录供批量渲染 if not (self.geotiff_dir_edit.text() or "").strip(): - self.geotiff_dir_edit.setText(str(step8_out_dir)) + self.geotiff_dir_edit.setText(str(step10_out_dir)) # GeoTIFF 单文件模式:默认选中第一个 - tif_files = sorted(step8_out_dir.glob("*.tif")) + tif_files = sorted(step10_out_dir.glob("*.tif")) if tif_files and not (self.geotiff_file.get_path() or "").strip(): self.geotiff_file.set_path(str(tif_files[0])) except Exception as e: diff --git a/src/gui/water_quality_gui.py b/src/gui/water_quality_gui.py index 3939276..24c1b0a 100644 --- a/src/gui/water_quality_gui.py +++ b/src/gui/water_quality_gui.py @@ -1378,7 +1378,8 @@ class WaterQualityGUI(QMainWindow): 'step11_map': "14_visualization/" } - # 定义步骤间的依赖关系:{当前步骤: {输入字段: (依赖步骤, 输出类型, 面板属性名)}} + # 依赖关系字典结构: + # '当前步骤ID': { '依赖参数名': ('上游步骤ID', '上游输出类型/Key', '当前步骤接收该路径的组件属性名') } self.step_dependencies = { 'step2': { 'img_path': ('step1', 'reference_img', 'img_file'), @@ -1386,31 +1387,36 @@ class WaterQualityGUI(QMainWindow): }, 'step3': { 'img_path': ('step1', 'reference_img', 'img_file'), - 'water_mask': ('step1', 'water_mask', 'water_mask_file'), + 'water_mask': ('step1', 'water_mask', 'water_mask_file') + }, + 'step4_sampling': { + 'deglint_img_path': ('step3', 'deglint_image', 'deglint_img_file'), + 'water_mask_path': ('step1', 'water_mask', 'water_mask_file') + }, + 'step5_clean': { + 'csv_path': ('step4_sampling', 'sampling_spectra', 'csv_file') # step5 寻找 step4 的采样点 }, 'step6_feature': { 'deglint_img_path': ('step3', 'deglint_image', 'deglint_img_file'), 'csv_path': ('step5_clean', 'processed_data', 'csv_file'), - 'boundary_mask_path': ('step1', 'water_mask', 'boundary_mask_file'), + 'boundary_mask_path': ('step1', 'water_mask', 'water_mask_file'), # step6_panel里叫water_mask_file 'glint_mask_path': ('step2', 'glint_mask', 'glint_mask_file') }, 'step7_index': { - 'training_csv_path': ('step6_feature', 'training_spectra', 'output_file') + 'training_csv_path': ('step6_feature', 'training_spectra', 'training_data_widget') # step7 找 step6 的光谱提取 }, 'step8_ml_train': { - 'training_csv_path': ('step7_index', 'water_indices', 'csv_file') - }, - 'step4_sampling': { - 'deglint_img_path': ('step3', 'deglint_image', 'deglint_img_file'), - 'water_mask_path': ('step1', 'water_mask', 'water_mask_file'), - 'glint_mask_path': ('step2', 'glint_mask', 'glint_mask_file') + 'training_csv_file': ('step7_index', 'training_spectra_indices', 'training_csv_file') # step8 找 step7 的指数宽表 }, 'step9_ml_predict': { - 'sampling_csv_path': ('step4_sampling', 'sampling_points', 'sampling_csv_file'), - 'models_dir': ('step8_ml_train', 'models', 'models_dir_file') + 'models_dir': ('step8_ml_train', 'Supervised_Model_Training', 'models_dir_widget') + }, + 'step10_watercolor': { + 'bsq_file': ('step3', 'deglint_image', 'bsq_file') # 水色反演需要去耀斑BSQ影像 }, 'step11_map': { - 'prediction_csv_path': ('step9_ml_predict', 'predictions', 'prediction_csv_file') + 'prediction_csv_dir_edit': ('step9_ml_predict', 'Machine_Learning_Prediction', 'prediction_csv_dir_edit'), + 'geotiff_dir_edit': ('step10_watercolor', 'WaterIndex_Images', 'geotiff_dir_edit') } } diff --git a/src/preprocessing/process_water_quality_data.py b/src/preprocessing/process_water_quality_data.py index 4f44cf7..1decaf6 100644 --- a/src/preprocessing/process_water_quality_data.py +++ b/src/preprocessing/process_water_quality_data.py @@ -4,6 +4,7 @@ if not hasattr(threading.Thread, "isAlive"): import warnings import os +import re import numpy as np import pandas as pd from scipy import stats @@ -11,6 +12,54 @@ from scipy import stats warnings.filterwarnings("ignore") +def auto_detect_coord_columns(df: pd.DataFrame): + """ + 双重验证智能识别坐标列: + 1. 严格正则匹配列名 + 2. 基于数值范围的地理学推断 + """ + lon_patterns = [ + r'^lon', r'^lng', r'^longitude', r'经度', r'^x$', r'^utm_x$', r'^pixel_x$' + ] + lat_patterns = [ + r'^lat', r'^latitude', r'纬度', r'^y$', r'^utm_y$', r'^pixel_y$' + ] + + x_col, y_col = None, None + + for col in df.columns: + col_str = str(col).lower().strip() + if x_col is None and any(re.search(p, col_str) for p in lon_patterns): + x_col = col + if y_col is None and any(re.search(p, col_str) for p in lat_patterns): + y_col = col + + if x_col and y_col: + return x_col, y_col + + numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist() + + if len(numeric_cols) >= 2: + col1, col2 = numeric_cols[0], numeric_cols[1] + mean1 = df[col1].head(10).mean() + mean2 = df[col2].head(10).mean() + + if abs(mean1) <= 90 and abs(mean2) > 90: + y_col, x_col = col1, col2 + elif abs(mean2) <= 90 and abs(mean1) > 90: + x_col, y_col = col1, col2 + else: + if mean1 > mean2: + x_col, y_col = col1, col2 + else: + x_col, y_col = col2, col1 + + print(f"💡 触发智能数值推断坐标列: X/经度->[{x_col}], Y/纬度->[{y_col}]") + return x_col, y_col + + return df.columns[0], df.columns[1] + + def detect_outliers_iqr(data: pd.DataFrame, column: str) -> pd.Series: """使用 IQR 方法检测异常值,返回与 data 同索引的布尔序列""" s = pd.to_numeric(data[column], errors="coerce") @@ -92,11 +141,15 @@ def process_water_quality_data(input_file: str, output_file: str): print(f"原始数据形状: {df.shape}") print(f"列名: {list(df.columns)}") + # 0.5) 智能检测坐标列 + x_col, y_col = auto_detect_coord_columns(df) + print(f"坐标列检测结果: X/经度=[{x_col}], Y/纬度=[{y_col}]") + # 1) 经纬度精度筛选(小数位 >= 7) print("\n正在筛选经纬度精度(小数位>=7)...") initial_count = len(df) - for col in ["经度", "纬度"]: + for col in [y_col, x_col]: if col in df.columns: dec_len = df[col].apply(_decimal_len) keep_mask = dec_len >= 7 @@ -109,26 +162,23 @@ def process_water_quality_data(input_file: str, output_file: str): # 2) 异常值检测(IQR)- 只删除异常值,不删除整行 print("\n正在检测异常值(IQR)...") - # 数值列 numeric_columns = df.select_dtypes(include=[np.number]).columns.tolist() - # 排除不检测的列 - exclude_columns = ["时间", "测量点", "纬度", "经度"] + exclude_columns = {"时间", "测量点", y_col, x_col} if "原始" in df.columns: - exclude_columns.append("原始") + exclude_columns.add("原始") columns_to_check = [c for c in numeric_columns if c not in exclude_columns] print(f"将检测以下列的异常值: {columns_to_check}") df_clean = df.copy() total_outliers_removed = 0 - + for column in columns_to_check: if column in df_clean.columns and df_clean[column].notna().sum() > 0: col_mask = detect_outliers_iqr(df_clean, column) outlier_count = int(col_mask.sum()) print(f'列 "{column}" 检测到 {outlier_count} 个异常值,将其设为 NaN') - # 只将异常值设为 NaN,不删除整行 df_clean.loc[col_mask, column] = np.nan total_outliers_removed += outlier_count @@ -140,7 +190,7 @@ def process_water_quality_data(input_file: str, output_file: str): df_clean = df_clean.drop(columns=["原始"]) print('已去除 "原始" 列') - # 4) 字段类型处理:尽量把“时间”转为 datetime + # 4) 字段类型处理:尽量把"时间"转为 datetime if "时间" in df_clean.columns: try: df_clean["时间"] = pd.to_datetime(df_clean["时间"], errors="coerce") @@ -153,22 +203,18 @@ def process_water_quality_data(input_file: str, output_file: str): print('错误:未找到 "测量点" 列') return - # 构建聚合字典 agg_dict = {} if "时间" in df_clean.columns and np.issubdtype(df_clean["时间"].dtype, np.datetime64): - # 时间取平均(等价于时间戳平均) agg_dict["时间"] = "mean" elif "时间" in df_clean.columns: - # 如果不是时间类型,保留最常见值以避免无意义的字符串平均 agg_dict["时间"] = lambda s: s.mode().iloc[0] if not s.mode().empty else s.dropna().iloc[0] if s.dropna().size else np.nan - for col in ["纬度", "经度"]: + for col in [y_col, x_col]: if col in df_clean.columns: agg_dict[col] = "mean" - # 其余数值列取均值 for col in df_clean.select_dtypes(include=[np.number]).columns: - if col not in ["纬度", "经度"]: + if col not in {y_col, x_col}: agg_dict[col] = "mean" grouped = df_clean.groupby("测量点", as_index=False).agg(agg_dict) diff --git a/tmp_watercolor_rescue.py b/tmp_watercolor_rescue.py index af382e6..35e8465 100644 --- a/tmp_watercolor_rescue.py +++ b/tmp_watercolor_rescue.py @@ -204,7 +204,7 @@ class Step11WaterColorPanel(QWidget): "输出目录:", "Directories" ) - self.output_dir.line_edit.setPlaceholderText("留空 → 工作目录/8_WaterIndex_Images") + self.output_dir.line_edit.setPlaceholderText("留空 → 工作目录/10_WaterIndex_Images") self.output_dir.browse_btn.clicked.disconnect() self.output_dir.browse_btn.clicked.connect(self._browse_output_dir) output_layout.addRow("输出目录:", self.output_dir) @@ -452,7 +452,7 @@ class Step11WaterColorPanel(QWidget): # 自动填入输出目录 if self.work_dir: - out_dir = os.path.join(self.work_dir, "8_WaterIndex_Images").replace('\\', '/') + out_dir = os.path.join(self.work_dir, "10_WaterIndex_Images").replace('\\', '/') os.makedirs(out_dir, exist_ok=True) if not self.output_dir.get_path(): self.output_dir.set_path(out_dir) @@ -483,7 +483,7 @@ class Step11WaterColorPanel(QWidget): return if not output_dir: work_dir = self._get_default_work_dir() - output_dir = os.path.join(work_dir, "8_WaterIndex_Images").replace('\\', '/') + output_dir = os.path.join(work_dir, "10_WaterIndex_Images").replace('\\', '/') os.makedirs(output_dir, exist_ok=True) self.output_dir.set_path(output_dir)