添加公式方法

This commit is contained in:
DXC
2026-06-12 16:48:20 +08:00
parent be47b70594
commit f73a7d8999
7 changed files with 162 additions and 62 deletions

View File

@ -348,7 +348,7 @@ class WaterIndexProcessor:
hdr_path : str, optional hdr_path : str, optional
ENVI HDR 文件路径None → 自动构造) ENVI HDR 文件路径None → 自动构造)
output_dir : str, optional output_dir : str, optional
输出目录None → 与 bsq_path 同目录下的 8_WaterIndex_Images/ 输出目录None → 与 bsq_path 同目录下的 10_WaterIndex_Images/
formula_names : list, optional formula_names : list, optional
要处理的公式名列表None → 处理全部) 要处理的公式名列表None → 处理全部)
water_mask : np.ndarray, optional water_mask : np.ndarray, optional
@ -374,7 +374,7 @@ class WaterIndexProcessor:
# ── 自动构造输出目录 ──────────────────────────────────────────── # ── 自动构造输出目录 ────────────────────────────────────────────
if output_dir is None: if output_dir is None:
output_dir = os.path.join(os.path.dirname(bsq_path), '8_WaterIndex_Images') output_dir = os.path.join(os.path.dirname(bsq_path), '10_WaterIndex_Images')
os.makedirs(output_dir, exist_ok=True) os.makedirs(output_dir, exist_ok=True)
def progress(msg: str, pct: float): def progress(msg: str, pct: float):
@ -610,7 +610,7 @@ class WaterIndexProcessor:
if os.path.isfile(hdr_path_alt): if os.path.isfile(hdr_path_alt):
hdr_path = hdr_path_alt hdr_path = hdr_path_alt
output_dir = os.path.join(work_dir, "8_WaterIndex_Images") output_dir = os.path.join(work_dir, "10_WaterIndex_Images")
# ── 加载水域掩膜(可选)─────────────────────────────────────── # ── 加载水域掩膜(可选)───────────────────────────────────────
water_mask: Optional[np.ndarray] = None water_mask: Optional[np.ndarray] = None

View File

@ -2,6 +2,7 @@ from osgeo import gdal, osr
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import os import os
import re
import spectral import spectral
from math import sin, cos, tan, sqrt, radians from math import sin, cos, tan, sqrt, radians
@ -473,9 +474,56 @@ def get_spectral_in_coor(imgpath, coorpath, outpath, radius=0, flare_path=None,
for i in range(min(3, coor_data.shape[0])): for i in range(min(3, coor_data.shape[0])):
print(f"{i + 1}: {coor_data[i, :min(5, coor_data.shape[1])]}") # 只显示前5列 print(f"{i + 1}: {coor_data[i, :min(5, coor_data.shape[1])]}") # 只显示前5列
# 提取原始坐标 # 提取原始坐标(使用智能坐标列检测)
lat_array = coor_data[:, 1] # 第2列是纬度跳过测量点ID列 lon_patterns = [
lon_array = coor_data[:, 2] # 第3列是经度 r'^lon', r'^lng', r'^longitude', r'经度', r'^x$', r'^utm_x$', r'^pixel_x$'
]
lat_patterns = [
r'^lat', r'^latitude', r'纬度', r'^y$', r'^utm_y$', r'^pixel_y$'
]
x_col_name, y_col_name = None, None
if coor_df is not None and hasattr(coor_df, 'columns'):
for col in coor_df.columns:
col_str = str(col).lower().strip()
if x_col_name is None and any(re.search(p, col_str) for p in lon_patterns):
x_col_name = col
if y_col_name is None and any(re.search(p, col_str) for p in lat_patterns):
y_col_name = col
if x_col_name and y_col_name and x_col_name in coor_df.columns and y_col_name in coor_df.columns:
lon_array = coor_df[x_col_name].values
lat_array = coor_df[y_col_name].values
print(f"💡 坐标列名检测: X/经度=[{x_col_name}], Y/纬度=[{y_col_name}]")
else:
numeric_cols = coor_df.select_dtypes(include=[np.number]).columns.tolist() if coor_df is not None else []
if len(numeric_cols) >= 2:
col1, col2 = numeric_cols[0], numeric_cols[1]
mean1 = coor_df[col1].head(10).mean()
mean2 = coor_df[col2].head(10).mean()
if abs(mean1) <= 90 and abs(mean2) > 90:
y_col_name, x_col_name = col1, col2
lon_array = coor_df[x_col_name].values
lat_array = coor_df[y_col_name].values
elif abs(mean2) <= 90 and abs(mean1) > 90:
x_col_name, y_col_name = col1, col2
lon_array = coor_df[x_col_name].values
lat_array = coor_df[y_col_name].values
else:
if mean1 > mean2:
x_col_name, y_col_name = col1, col2
else:
x_col_name, y_col_name = col2, col1
lon_array = coor_df[x_col_name].values
lat_array = coor_df[y_col_name].values
print(f"💡 触发智能数值推断坐标列: X/经度=[{x_col_name}], Y/纬度=[{y_col_name}]")
else:
if coor_data is not None and coor_data.shape[1] >= 3:
lat_array = coor_data[:, 1]
lon_array = coor_data[:, 2]
else:
raise Exception("坐标文件格式错误需要至少2列数据且最好包含坐标列名如lon/lat/经度/纬度)")
print(f"\n=== 原始坐标信息 ===") print(f"\n=== 原始坐标信息 ===")
print(f"原始坐标范围: 经度 {np.min(lon_array):.6f} ~ {np.max(lon_array):.6f}, 纬度 {np.min(lat_array):.6f} ~ {np.max(lat_array):.6f}") print(f"原始坐标范围: 经度 {np.min(lon_array):.6f} ~ {np.max(lon_array):.6f}, 纬度 {np.min(lat_array):.6f} ~ {np.max(lat_array):.6f}")

View File

@ -253,7 +253,7 @@ class Step11MapPanel(QWidget):
self.geotiff_dir_label = QLabel("水色指数目录:") self.geotiff_dir_label = QLabel("水色指数目录:")
self.geotiff_dir_label.setMinimumWidth(120) self.geotiff_dir_label.setMinimumWidth(120)
self.geotiff_dir_edit = QLineEdit() self.geotiff_dir_edit = QLineEdit()
self.geotiff_dir_edit.setPlaceholderText("选择 8_WaterIndex_Images 文件夹(批量渲染)…") self.geotiff_dir_edit.setPlaceholderText("选择 10_WaterIndex_Images 文件夹(批量渲染)…")
geotiff_dir_btn = QPushButton("浏览…") geotiff_dir_btn = QPushButton("浏览…")
geotiff_dir_btn.setMaximumWidth(80) geotiff_dir_btn.setMaximumWidth(80)
geotiff_dir_btn.clicked.connect(self.browse_geotiff_dir) geotiff_dir_btn.clicked.connect(self.browse_geotiff_dir)
@ -392,7 +392,7 @@ class Step11MapPanel(QWidget):
"""浏览 GeoTIFF 文件夹(批量模式)""" """浏览 GeoTIFF 文件夹(批量模式)"""
default = self._get_default_work_dir() default = self._get_default_work_dir()
if default: if default:
default = os.path.join(default, "8_WaterIndex_Images") default = os.path.join(default, "10_WaterIndex_Images")
d = QFileDialog.getExistingDirectory( d = QFileDialog.getExistingDirectory(
self, "选择水色指数 GeoTIFF 文件夹", default self, "选择水色指数 GeoTIFF 文件夹", default
) )
@ -514,18 +514,18 @@ class Step11MapPanel(QWidget):
pred_dir = None pred_dir = None
if hasattr(main_window, 'step11_prediction_panel'): if hasattr(main_window, 'step11_prediction_panel'):
step8_widget = getattr(main_window.step11_prediction_panel, 'output_file', None) step8_widget = getattr(main_window.step11_prediction_panel, 'output_file', None)
step8_output = "" step10_output = ""
if hasattr(step8_widget, 'get_path'): if hasattr(step8_widget, 'get_path'):
step8_output = step8_widget.get_path() or "" step10_output = step8_widget.get_path() or ""
elif hasattr(step8_widget, 'text'): elif hasattr(step8_widget, 'text'):
step8_output = step8_widget.text() or "" step10_output = step8_widget.text() or ""
if step8_output: if step10_output:
# 若为相对路径,使用 work_dir 合成为绝对路径 # 若为相对路径,使用 work_dir 合成为绝对路径
if not os.path.isabs(step8_output): if not os.path.isabs(step10_output):
step8_output = os.path.join(self.work_dir or '', step8_output).replace('\\', '/') step10_output = os.path.join(self.work_dir or '', step10_output).replace('\\', '/')
# 提取父目录后追加 Machine_Learning_Prediction最底层真实子目录 # 提取父目录后追加 Machine_Learning_Prediction最底层真实子目录
base_pred_dir = str(Path(step8_output).parent) base_pred_dir = str(Path(step10_output).parent)
ml_pred_dir = Path(base_pred_dir) / "Machine_Learning_Prediction" ml_pred_dir = Path(base_pred_dir) / "Machine_Learning_Prediction"
pred_dir = str(ml_pred_dir) if ml_pred_dir.exists() else base_pred_dir pred_dir = str(ml_pred_dir) if ml_pred_dir.exists() else base_pred_dir
@ -594,13 +594,13 @@ class Step11MapPanel(QWidget):
print("⚠️ 提示:专题图生成模块需传入标准矢量边界文件 (.shp),请手动选择。") print("⚠️ 提示:专题图生成模块需传入标准矢量边界文件 (.shp),请手动选择。")
# 6. 自动探测 Step 8 输出的水色指数 GeoTIFFGeoTIFF 渲染模式) # 6. 自动探测 Step 8 输出的水色指数 GeoTIFFGeoTIFF 渲染模式)
step8_out_dir = Path(self.work_dir) / "8_WaterIndex_Images" if self.work_dir else None step10_out_dir = Path(self.work_dir) / "10_WaterIndex_Images" if self.work_dir else None
if step8_out_dir and step8_out_dir.is_dir(): if step10_out_dir and step10_out_dir.is_dir():
# GeoTIFF 批量模式:填充目录供批量渲染 # GeoTIFF 批量模式:填充目录供批量渲染
if not (self.geotiff_dir_edit.text() or "").strip(): if not (self.geotiff_dir_edit.text() or "").strip():
self.geotiff_dir_edit.setText(str(step8_out_dir)) self.geotiff_dir_edit.setText(str(step10_out_dir))
# GeoTIFF 单文件模式:默认选中第一个 # GeoTIFF 单文件模式:默认选中第一个
tif_files = sorted(step8_out_dir.glob("*.tif")) tif_files = sorted(step10_out_dir.glob("*.tif"))
if tif_files and not (self.geotiff_file.get_path() or "").strip(): if tif_files and not (self.geotiff_file.get_path() or "").strip():
self.geotiff_file.set_path(str(tif_files[0])) self.geotiff_file.set_path(str(tif_files[0]))
except Exception as e: except Exception as e:

View File

@ -253,7 +253,7 @@ class Step14Panel(QWidget):
self.geotiff_dir_label = QLabel("水色指数目录:") self.geotiff_dir_label = QLabel("水色指数目录:")
self.geotiff_dir_label.setMinimumWidth(120) self.geotiff_dir_label.setMinimumWidth(120)
self.geotiff_dir_edit = QLineEdit() self.geotiff_dir_edit = QLineEdit()
self.geotiff_dir_edit.setPlaceholderText("选择 8_WaterIndex_Images 文件夹(批量渲染)…") self.geotiff_dir_edit.setPlaceholderText("选择 10_WaterIndex_Images 文件夹(批量渲染)…")
geotiff_dir_btn = QPushButton("浏览…") geotiff_dir_btn = QPushButton("浏览…")
geotiff_dir_btn.setMaximumWidth(80) geotiff_dir_btn.setMaximumWidth(80)
geotiff_dir_btn.clicked.connect(self.browse_geotiff_dir) geotiff_dir_btn.clicked.connect(self.browse_geotiff_dir)
@ -392,7 +392,7 @@ class Step14Panel(QWidget):
"""浏览 GeoTIFF 文件夹(批量模式)""" """浏览 GeoTIFF 文件夹(批量模式)"""
default = self._get_default_work_dir() default = self._get_default_work_dir()
if default: if default:
default = os.path.join(default, "8_WaterIndex_Images") default = os.path.join(default, "10_WaterIndex_Images")
d = QFileDialog.getExistingDirectory( d = QFileDialog.getExistingDirectory(
self, "选择水色指数 GeoTIFF 文件夹", default self, "选择水色指数 GeoTIFF 文件夹", default
) )
@ -514,18 +514,18 @@ class Step14Panel(QWidget):
pred_dir = None pred_dir = None
if hasattr(main_window, 'step11_prediction_panel'): if hasattr(main_window, 'step11_prediction_panel'):
step8_widget = getattr(main_window.step11_prediction_panel, 'output_file', None) step8_widget = getattr(main_window.step11_prediction_panel, 'output_file', None)
step8_output = "" step10_output = ""
if hasattr(step8_widget, 'get_path'): if hasattr(step8_widget, 'get_path'):
step8_output = step8_widget.get_path() or "" step10_output = step8_widget.get_path() or ""
elif hasattr(step8_widget, 'text'): elif hasattr(step8_widget, 'text'):
step8_output = step8_widget.text() or "" step10_output = step8_widget.text() or ""
if step8_output: if step10_output:
# 若为相对路径,使用 work_dir 合成为绝对路径 # 若为相对路径,使用 work_dir 合成为绝对路径
if not os.path.isabs(step8_output): if not os.path.isabs(step10_output):
step8_output = os.path.join(self.work_dir or '', step8_output).replace('\\', '/') step10_output = os.path.join(self.work_dir or '', step10_output).replace('\\', '/')
# 提取父目录后追加 Machine_Learning_Prediction最底层真实子目录 # 提取父目录后追加 Machine_Learning_Prediction最底层真实子目录
base_pred_dir = str(Path(step8_output).parent) base_pred_dir = str(Path(step10_output).parent)
ml_pred_dir = Path(base_pred_dir) / "Machine_Learning_Prediction" ml_pred_dir = Path(base_pred_dir) / "Machine_Learning_Prediction"
pred_dir = str(ml_pred_dir) if ml_pred_dir.exists() else base_pred_dir pred_dir = str(ml_pred_dir) if ml_pred_dir.exists() else base_pred_dir
@ -594,13 +594,13 @@ class Step14Panel(QWidget):
print("⚠️ 提示:专题图生成模块需传入标准矢量边界文件 (.shp),请手动选择。") print("⚠️ 提示:专题图生成模块需传入标准矢量边界文件 (.shp),请手动选择。")
# 6. 自动探测 Step 8 输出的水色指数 GeoTIFFGeoTIFF 渲染模式) # 6. 自动探测 Step 8 输出的水色指数 GeoTIFFGeoTIFF 渲染模式)
step8_out_dir = Path(self.work_dir) / "8_WaterIndex_Images" if self.work_dir else None step10_out_dir = Path(self.work_dir) / "10_WaterIndex_Images" if self.work_dir else None
if step8_out_dir and step8_out_dir.is_dir(): if step10_out_dir and step10_out_dir.is_dir():
# GeoTIFF 批量模式:填充目录供批量渲染 # GeoTIFF 批量模式:填充目录供批量渲染
if not (self.geotiff_dir_edit.text() or "").strip(): if not (self.geotiff_dir_edit.text() or "").strip():
self.geotiff_dir_edit.setText(str(step8_out_dir)) self.geotiff_dir_edit.setText(str(step10_out_dir))
# GeoTIFF 单文件模式:默认选中第一个 # GeoTIFF 单文件模式:默认选中第一个
tif_files = sorted(step8_out_dir.glob("*.tif")) tif_files = sorted(step10_out_dir.glob("*.tif"))
if tif_files and not (self.geotiff_file.get_path() or "").strip(): if tif_files and not (self.geotiff_file.get_path() or "").strip():
self.geotiff_file.set_path(str(tif_files[0])) self.geotiff_file.set_path(str(tif_files[0]))
except Exception as e: except Exception as e:

View File

@ -1378,7 +1378,8 @@ class WaterQualityGUI(QMainWindow):
'step11_map': "14_visualization/" 'step11_map': "14_visualization/"
} }
# 定义步骤间的依赖关系:{当前步骤: {输入字段: (依赖步骤, 输出类型, 面板属性名)}} # 依赖关系字典结构:
# '当前步骤ID': { '依赖参数名': ('上游步骤ID', '上游输出类型/Key', '当前步骤接收该路径的组件属性名') }
self.step_dependencies = { self.step_dependencies = {
'step2': { 'step2': {
'img_path': ('step1', 'reference_img', 'img_file'), 'img_path': ('step1', 'reference_img', 'img_file'),
@ -1386,31 +1387,36 @@ class WaterQualityGUI(QMainWindow):
}, },
'step3': { 'step3': {
'img_path': ('step1', 'reference_img', 'img_file'), 'img_path': ('step1', 'reference_img', 'img_file'),
'water_mask': ('step1', 'water_mask', 'water_mask_file'), 'water_mask': ('step1', 'water_mask', 'water_mask_file')
},
'step4_sampling': {
'deglint_img_path': ('step3', 'deglint_image', 'deglint_img_file'),
'water_mask_path': ('step1', 'water_mask', 'water_mask_file')
},
'step5_clean': {
'csv_path': ('step4_sampling', 'sampling_spectra', 'csv_file') # step5 寻找 step4 的采样点
}, },
'step6_feature': { 'step6_feature': {
'deglint_img_path': ('step3', 'deglint_image', 'deglint_img_file'), 'deglint_img_path': ('step3', 'deglint_image', 'deglint_img_file'),
'csv_path': ('step5_clean', 'processed_data', 'csv_file'), 'csv_path': ('step5_clean', 'processed_data', 'csv_file'),
'boundary_mask_path': ('step1', 'water_mask', 'boundary_mask_file'), 'boundary_mask_path': ('step1', 'water_mask', 'water_mask_file'), # step6_panel里叫water_mask_file
'glint_mask_path': ('step2', 'glint_mask', 'glint_mask_file') 'glint_mask_path': ('step2', 'glint_mask', 'glint_mask_file')
}, },
'step7_index': { 'step7_index': {
'training_csv_path': ('step6_feature', 'training_spectra', 'output_file') 'training_csv_path': ('step6_feature', 'training_spectra', 'training_data_widget') # step7 找 step6 的光谱提取
}, },
'step8_ml_train': { 'step8_ml_train': {
'training_csv_path': ('step7_index', 'water_indices', 'csv_file') 'training_csv_file': ('step7_index', 'training_spectra_indices', 'training_csv_file') # step8 找 step7 的指数宽表
},
'step4_sampling': {
'deglint_img_path': ('step3', 'deglint_image', 'deglint_img_file'),
'water_mask_path': ('step1', 'water_mask', 'water_mask_file'),
'glint_mask_path': ('step2', 'glint_mask', 'glint_mask_file')
}, },
'step9_ml_predict': { 'step9_ml_predict': {
'sampling_csv_path': ('step4_sampling', 'sampling_points', 'sampling_csv_file'), 'models_dir': ('step8_ml_train', 'Supervised_Model_Training', 'models_dir_widget')
'models_dir': ('step8_ml_train', 'models', 'models_dir_file') },
'step10_watercolor': {
'bsq_file': ('step3', 'deglint_image', 'bsq_file') # 水色反演需要去耀斑BSQ影像
}, },
'step11_map': { 'step11_map': {
'prediction_csv_path': ('step9_ml_predict', 'predictions', 'prediction_csv_file') 'prediction_csv_dir_edit': ('step9_ml_predict', 'Machine_Learning_Prediction', 'prediction_csv_dir_edit'),
'geotiff_dir_edit': ('step10_watercolor', 'WaterIndex_Images', 'geotiff_dir_edit')
} }
} }

View File

@ -4,6 +4,7 @@ if not hasattr(threading.Thread, "isAlive"):
import warnings import warnings
import os import os
import re
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from scipy import stats from scipy import stats
@ -11,6 +12,54 @@ from scipy import stats
warnings.filterwarnings("ignore") warnings.filterwarnings("ignore")
def auto_detect_coord_columns(df: pd.DataFrame):
"""
双重验证智能识别坐标列:
1. 严格正则匹配列名
2. 基于数值范围的地理学推断
"""
lon_patterns = [
r'^lon', r'^lng', r'^longitude', r'经度', r'^x$', r'^utm_x$', r'^pixel_x$'
]
lat_patterns = [
r'^lat', r'^latitude', r'纬度', r'^y$', r'^utm_y$', r'^pixel_y$'
]
x_col, y_col = None, None
for col in df.columns:
col_str = str(col).lower().strip()
if x_col is None and any(re.search(p, col_str) for p in lon_patterns):
x_col = col
if y_col is None and any(re.search(p, col_str) for p in lat_patterns):
y_col = col
if x_col and y_col:
return x_col, y_col
numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
if len(numeric_cols) >= 2:
col1, col2 = numeric_cols[0], numeric_cols[1]
mean1 = df[col1].head(10).mean()
mean2 = df[col2].head(10).mean()
if abs(mean1) <= 90 and abs(mean2) > 90:
y_col, x_col = col1, col2
elif abs(mean2) <= 90 and abs(mean1) > 90:
x_col, y_col = col1, col2
else:
if mean1 > mean2:
x_col, y_col = col1, col2
else:
x_col, y_col = col2, col1
print(f"💡 触发智能数值推断坐标列: X/经度->[{x_col}], Y/纬度->[{y_col}]")
return x_col, y_col
return df.columns[0], df.columns[1]
def detect_outliers_iqr(data: pd.DataFrame, column: str) -> pd.Series: def detect_outliers_iqr(data: pd.DataFrame, column: str) -> pd.Series:
"""使用 IQR 方法检测异常值,返回与 data 同索引的布尔序列""" """使用 IQR 方法检测异常值,返回与 data 同索引的布尔序列"""
s = pd.to_numeric(data[column], errors="coerce") s = pd.to_numeric(data[column], errors="coerce")
@ -92,11 +141,15 @@ def process_water_quality_data(input_file: str, output_file: str):
print(f"原始数据形状: {df.shape}") print(f"原始数据形状: {df.shape}")
print(f"列名: {list(df.columns)}") print(f"列名: {list(df.columns)}")
# 0.5) 智能检测坐标列
x_col, y_col = auto_detect_coord_columns(df)
print(f"坐标列检测结果: X/经度=[{x_col}], Y/纬度=[{y_col}]")
# 1) 经纬度精度筛选(小数位 >= 7 # 1) 经纬度精度筛选(小数位 >= 7
print("\n正在筛选经纬度精度(小数位>=7)...") print("\n正在筛选经纬度精度(小数位>=7)...")
initial_count = len(df) initial_count = len(df)
for col in ["经度", "纬度"]: for col in [y_col, x_col]:
if col in df.columns: if col in df.columns:
dec_len = df[col].apply(_decimal_len) dec_len = df[col].apply(_decimal_len)
keep_mask = dec_len >= 7 keep_mask = dec_len >= 7
@ -109,13 +162,11 @@ def process_water_quality_data(input_file: str, output_file: str):
# 2) 异常值检测IQR- 只删除异常值,不删除整行 # 2) 异常值检测IQR- 只删除异常值,不删除整行
print("\n正在检测异常值(IQR)...") print("\n正在检测异常值(IQR)...")
# 数值列
numeric_columns = df.select_dtypes(include=[np.number]).columns.tolist() numeric_columns = df.select_dtypes(include=[np.number]).columns.tolist()
# 排除不检测的列 exclude_columns = {"时间", "测量点", y_col, x_col}
exclude_columns = ["时间", "测量点", "纬度", "经度"]
if "原始" in df.columns: if "原始" in df.columns:
exclude_columns.append("原始") exclude_columns.add("原始")
columns_to_check = [c for c in numeric_columns if c not in exclude_columns] columns_to_check = [c for c in numeric_columns if c not in exclude_columns]
print(f"将检测以下列的异常值: {columns_to_check}") print(f"将检测以下列的异常值: {columns_to_check}")
@ -128,7 +179,6 @@ def process_water_quality_data(input_file: str, output_file: str):
col_mask = detect_outliers_iqr(df_clean, column) col_mask = detect_outliers_iqr(df_clean, column)
outlier_count = int(col_mask.sum()) outlier_count = int(col_mask.sum())
print(f'"{column}" 检测到 {outlier_count} 个异常值,将其设为 NaN') print(f'"{column}" 检测到 {outlier_count} 个异常值,将其设为 NaN')
# 只将异常值设为 NaN不删除整行
df_clean.loc[col_mask, column] = np.nan df_clean.loc[col_mask, column] = np.nan
total_outliers_removed += outlier_count total_outliers_removed += outlier_count
@ -140,7 +190,7 @@ def process_water_quality_data(input_file: str, output_file: str):
df_clean = df_clean.drop(columns=["原始"]) df_clean = df_clean.drop(columns=["原始"])
print('已去除 "原始"') print('已去除 "原始"')
# 4) 字段类型处理:尽量把时间转为 datetime # 4) 字段类型处理:尽量把"时间"转为 datetime
if "时间" in df_clean.columns: if "时间" in df_clean.columns:
try: try:
df_clean["时间"] = pd.to_datetime(df_clean["时间"], errors="coerce") df_clean["时间"] = pd.to_datetime(df_clean["时间"], errors="coerce")
@ -153,22 +203,18 @@ def process_water_quality_data(input_file: str, output_file: str):
print('错误:未找到 "测量点"') print('错误:未找到 "测量点"')
return return
# 构建聚合字典
agg_dict = {} agg_dict = {}
if "时间" in df_clean.columns and np.issubdtype(df_clean["时间"].dtype, np.datetime64): if "时间" in df_clean.columns and np.issubdtype(df_clean["时间"].dtype, np.datetime64):
# 时间取平均(等价于时间戳平均)
agg_dict["时间"] = "mean" agg_dict["时间"] = "mean"
elif "时间" in df_clean.columns: elif "时间" in df_clean.columns:
# 如果不是时间类型,保留最常见值以避免无意义的字符串平均
agg_dict["时间"] = lambda s: s.mode().iloc[0] if not s.mode().empty else s.dropna().iloc[0] if s.dropna().size else np.nan agg_dict["时间"] = lambda s: s.mode().iloc[0] if not s.mode().empty else s.dropna().iloc[0] if s.dropna().size else np.nan
for col in ["纬度", "经度"]: for col in [y_col, x_col]:
if col in df_clean.columns: if col in df_clean.columns:
agg_dict[col] = "mean" agg_dict[col] = "mean"
# 其余数值列取均值
for col in df_clean.select_dtypes(include=[np.number]).columns: for col in df_clean.select_dtypes(include=[np.number]).columns:
if col not in ["纬度", "经度"]: if col not in {y_col, x_col}:
agg_dict[col] = "mean" agg_dict[col] = "mean"
grouped = df_clean.groupby("测量点", as_index=False).agg(agg_dict) grouped = df_clean.groupby("测量点", as_index=False).agg(agg_dict)

View File

@ -204,7 +204,7 @@ class Step11WaterColorPanel(QWidget):
"输出目录:", "输出目录:",
"Directories" "Directories"
) )
self.output_dir.line_edit.setPlaceholderText("留空 → 工作目录/8_WaterIndex_Images") self.output_dir.line_edit.setPlaceholderText("留空 → 工作目录/10_WaterIndex_Images")
self.output_dir.browse_btn.clicked.disconnect() self.output_dir.browse_btn.clicked.disconnect()
self.output_dir.browse_btn.clicked.connect(self._browse_output_dir) self.output_dir.browse_btn.clicked.connect(self._browse_output_dir)
output_layout.addRow("输出目录:", self.output_dir) output_layout.addRow("输出目录:", self.output_dir)
@ -452,7 +452,7 @@ class Step11WaterColorPanel(QWidget):
# 自动填入输出目录 # 自动填入输出目录
if self.work_dir: if self.work_dir:
out_dir = os.path.join(self.work_dir, "8_WaterIndex_Images").replace('\\', '/') out_dir = os.path.join(self.work_dir, "10_WaterIndex_Images").replace('\\', '/')
os.makedirs(out_dir, exist_ok=True) os.makedirs(out_dir, exist_ok=True)
if not self.output_dir.get_path(): if not self.output_dir.get_path():
self.output_dir.set_path(out_dir) self.output_dir.set_path(out_dir)
@ -483,7 +483,7 @@ class Step11WaterColorPanel(QWidget):
return return
if not output_dir: if not output_dir:
work_dir = self._get_default_work_dir() work_dir = self._get_default_work_dir()
output_dir = os.path.join(work_dir, "8_WaterIndex_Images").replace('\\', '/') output_dir = os.path.join(work_dir, "10_WaterIndex_Images").replace('\\', '/')
os.makedirs(output_dir, exist_ok=True) os.makedirs(output_dir, exist_ok=True)
self.output_dir.set_path(output_dir) self.output_dir.set_path(output_dir)