添加公式方法
This commit is contained in:
@ -348,7 +348,7 @@ class WaterIndexProcessor:
|
|||||||
hdr_path : str, optional
|
hdr_path : str, optional
|
||||||
ENVI HDR 文件路径(None → 自动构造)
|
ENVI HDR 文件路径(None → 自动构造)
|
||||||
output_dir : str, optional
|
output_dir : str, optional
|
||||||
输出目录(None → 与 bsq_path 同目录下的 8_WaterIndex_Images/)
|
输出目录(None → 与 bsq_path 同目录下的 10_WaterIndex_Images/)
|
||||||
formula_names : list, optional
|
formula_names : list, optional
|
||||||
要处理的公式名列表(None → 处理全部)
|
要处理的公式名列表(None → 处理全部)
|
||||||
water_mask : np.ndarray, optional
|
water_mask : np.ndarray, optional
|
||||||
@ -374,7 +374,7 @@ class WaterIndexProcessor:
|
|||||||
|
|
||||||
# ── 自动构造输出目录 ────────────────────────────────────────────
|
# ── 自动构造输出目录 ────────────────────────────────────────────
|
||||||
if output_dir is None:
|
if output_dir is None:
|
||||||
output_dir = os.path.join(os.path.dirname(bsq_path), '8_WaterIndex_Images')
|
output_dir = os.path.join(os.path.dirname(bsq_path), '10_WaterIndex_Images')
|
||||||
os.makedirs(output_dir, exist_ok=True)
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
|
||||||
def progress(msg: str, pct: float):
|
def progress(msg: str, pct: float):
|
||||||
@ -610,7 +610,7 @@ class WaterIndexProcessor:
|
|||||||
if os.path.isfile(hdr_path_alt):
|
if os.path.isfile(hdr_path_alt):
|
||||||
hdr_path = hdr_path_alt
|
hdr_path = hdr_path_alt
|
||||||
|
|
||||||
output_dir = os.path.join(work_dir, "8_WaterIndex_Images")
|
output_dir = os.path.join(work_dir, "10_WaterIndex_Images")
|
||||||
|
|
||||||
# ── 加载水域掩膜(可选)───────────────────────────────────────
|
# ── 加载水域掩膜(可选)───────────────────────────────────────
|
||||||
water_mask: Optional[np.ndarray] = None
|
water_mask: Optional[np.ndarray] = None
|
||||||
|
|||||||
@ -2,6 +2,7 @@ from osgeo import gdal, osr
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import spectral
|
import spectral
|
||||||
from math import sin, cos, tan, sqrt, radians
|
from math import sin, cos, tan, sqrt, radians
|
||||||
|
|
||||||
@ -473,9 +474,56 @@ def get_spectral_in_coor(imgpath, coorpath, outpath, radius=0, flare_path=None,
|
|||||||
for i in range(min(3, coor_data.shape[0])):
|
for i in range(min(3, coor_data.shape[0])):
|
||||||
print(f" 行{i + 1}: {coor_data[i, :min(5, coor_data.shape[1])]}") # 只显示前5列
|
print(f" 行{i + 1}: {coor_data[i, :min(5, coor_data.shape[1])]}") # 只显示前5列
|
||||||
|
|
||||||
# 提取原始坐标
|
# 提取原始坐标(使用智能坐标列检测)
|
||||||
lat_array = coor_data[:, 1] # 第2列是纬度(跳过测量点ID列)
|
lon_patterns = [
|
||||||
lon_array = coor_data[:, 2] # 第3列是经度
|
r'^lon', r'^lng', r'^longitude', r'经度', r'^x$', r'^utm_x$', r'^pixel_x$'
|
||||||
|
]
|
||||||
|
lat_patterns = [
|
||||||
|
r'^lat', r'^latitude', r'纬度', r'^y$', r'^utm_y$', r'^pixel_y$'
|
||||||
|
]
|
||||||
|
|
||||||
|
x_col_name, y_col_name = None, None
|
||||||
|
|
||||||
|
if coor_df is not None and hasattr(coor_df, 'columns'):
|
||||||
|
for col in coor_df.columns:
|
||||||
|
col_str = str(col).lower().strip()
|
||||||
|
if x_col_name is None and any(re.search(p, col_str) for p in lon_patterns):
|
||||||
|
x_col_name = col
|
||||||
|
if y_col_name is None and any(re.search(p, col_str) for p in lat_patterns):
|
||||||
|
y_col_name = col
|
||||||
|
|
||||||
|
if x_col_name and y_col_name and x_col_name in coor_df.columns and y_col_name in coor_df.columns:
|
||||||
|
lon_array = coor_df[x_col_name].values
|
||||||
|
lat_array = coor_df[y_col_name].values
|
||||||
|
print(f"💡 坐标列名检测: X/经度=[{x_col_name}], Y/纬度=[{y_col_name}]")
|
||||||
|
else:
|
||||||
|
numeric_cols = coor_df.select_dtypes(include=[np.number]).columns.tolist() if coor_df is not None else []
|
||||||
|
if len(numeric_cols) >= 2:
|
||||||
|
col1, col2 = numeric_cols[0], numeric_cols[1]
|
||||||
|
mean1 = coor_df[col1].head(10).mean()
|
||||||
|
mean2 = coor_df[col2].head(10).mean()
|
||||||
|
if abs(mean1) <= 90 and abs(mean2) > 90:
|
||||||
|
y_col_name, x_col_name = col1, col2
|
||||||
|
lon_array = coor_df[x_col_name].values
|
||||||
|
lat_array = coor_df[y_col_name].values
|
||||||
|
elif abs(mean2) <= 90 and abs(mean1) > 90:
|
||||||
|
x_col_name, y_col_name = col1, col2
|
||||||
|
lon_array = coor_df[x_col_name].values
|
||||||
|
lat_array = coor_df[y_col_name].values
|
||||||
|
else:
|
||||||
|
if mean1 > mean2:
|
||||||
|
x_col_name, y_col_name = col1, col2
|
||||||
|
else:
|
||||||
|
x_col_name, y_col_name = col2, col1
|
||||||
|
lon_array = coor_df[x_col_name].values
|
||||||
|
lat_array = coor_df[y_col_name].values
|
||||||
|
print(f"💡 触发智能数值推断坐标列: X/经度=[{x_col_name}], Y/纬度=[{y_col_name}]")
|
||||||
|
else:
|
||||||
|
if coor_data is not None and coor_data.shape[1] >= 3:
|
||||||
|
lat_array = coor_data[:, 1]
|
||||||
|
lon_array = coor_data[:, 2]
|
||||||
|
else:
|
||||||
|
raise Exception("坐标文件格式错误:需要至少2列数据,且最好包含坐标列名(如lon/lat/经度/纬度)")
|
||||||
|
|
||||||
print(f"\n=== 原始坐标信息 ===")
|
print(f"\n=== 原始坐标信息 ===")
|
||||||
print(f"原始坐标范围: 经度 {np.min(lon_array):.6f} ~ {np.max(lon_array):.6f}, 纬度 {np.min(lat_array):.6f} ~ {np.max(lat_array):.6f}")
|
print(f"原始坐标范围: 经度 {np.min(lon_array):.6f} ~ {np.max(lon_array):.6f}, 纬度 {np.min(lat_array):.6f} ~ {np.max(lat_array):.6f}")
|
||||||
|
|||||||
@ -253,7 +253,7 @@ class Step11MapPanel(QWidget):
|
|||||||
self.geotiff_dir_label = QLabel("水色指数目录:")
|
self.geotiff_dir_label = QLabel("水色指数目录:")
|
||||||
self.geotiff_dir_label.setMinimumWidth(120)
|
self.geotiff_dir_label.setMinimumWidth(120)
|
||||||
self.geotiff_dir_edit = QLineEdit()
|
self.geotiff_dir_edit = QLineEdit()
|
||||||
self.geotiff_dir_edit.setPlaceholderText("选择 8_WaterIndex_Images 文件夹(批量渲染)…")
|
self.geotiff_dir_edit.setPlaceholderText("选择 10_WaterIndex_Images 文件夹(批量渲染)…")
|
||||||
geotiff_dir_btn = QPushButton("浏览…")
|
geotiff_dir_btn = QPushButton("浏览…")
|
||||||
geotiff_dir_btn.setMaximumWidth(80)
|
geotiff_dir_btn.setMaximumWidth(80)
|
||||||
geotiff_dir_btn.clicked.connect(self.browse_geotiff_dir)
|
geotiff_dir_btn.clicked.connect(self.browse_geotiff_dir)
|
||||||
@ -392,7 +392,7 @@ class Step11MapPanel(QWidget):
|
|||||||
"""浏览 GeoTIFF 文件夹(批量模式)"""
|
"""浏览 GeoTIFF 文件夹(批量模式)"""
|
||||||
default = self._get_default_work_dir()
|
default = self._get_default_work_dir()
|
||||||
if default:
|
if default:
|
||||||
default = os.path.join(default, "8_WaterIndex_Images")
|
default = os.path.join(default, "10_WaterIndex_Images")
|
||||||
d = QFileDialog.getExistingDirectory(
|
d = QFileDialog.getExistingDirectory(
|
||||||
self, "选择水色指数 GeoTIFF 文件夹", default
|
self, "选择水色指数 GeoTIFF 文件夹", default
|
||||||
)
|
)
|
||||||
@ -514,18 +514,18 @@ class Step11MapPanel(QWidget):
|
|||||||
pred_dir = None
|
pred_dir = None
|
||||||
if hasattr(main_window, 'step11_prediction_panel'):
|
if hasattr(main_window, 'step11_prediction_panel'):
|
||||||
step8_widget = getattr(main_window.step11_prediction_panel, 'output_file', None)
|
step8_widget = getattr(main_window.step11_prediction_panel, 'output_file', None)
|
||||||
step8_output = ""
|
step10_output = ""
|
||||||
if hasattr(step8_widget, 'get_path'):
|
if hasattr(step8_widget, 'get_path'):
|
||||||
step8_output = step8_widget.get_path() or ""
|
step10_output = step8_widget.get_path() or ""
|
||||||
elif hasattr(step8_widget, 'text'):
|
elif hasattr(step8_widget, 'text'):
|
||||||
step8_output = step8_widget.text() or ""
|
step10_output = step8_widget.text() or ""
|
||||||
|
|
||||||
if step8_output:
|
if step10_output:
|
||||||
# 若为相对路径,使用 work_dir 合成为绝对路径
|
# 若为相对路径,使用 work_dir 合成为绝对路径
|
||||||
if not os.path.isabs(step8_output):
|
if not os.path.isabs(step10_output):
|
||||||
step8_output = os.path.join(self.work_dir or '', step8_output).replace('\\', '/')
|
step10_output = os.path.join(self.work_dir or '', step10_output).replace('\\', '/')
|
||||||
# 提取父目录后追加 Machine_Learning_Prediction(最底层真实子目录)
|
# 提取父目录后追加 Machine_Learning_Prediction(最底层真实子目录)
|
||||||
base_pred_dir = str(Path(step8_output).parent)
|
base_pred_dir = str(Path(step10_output).parent)
|
||||||
ml_pred_dir = Path(base_pred_dir) / "Machine_Learning_Prediction"
|
ml_pred_dir = Path(base_pred_dir) / "Machine_Learning_Prediction"
|
||||||
pred_dir = str(ml_pred_dir) if ml_pred_dir.exists() else base_pred_dir
|
pred_dir = str(ml_pred_dir) if ml_pred_dir.exists() else base_pred_dir
|
||||||
|
|
||||||
@ -594,13 +594,13 @@ class Step11MapPanel(QWidget):
|
|||||||
print("⚠️ 提示:专题图生成模块需传入标准矢量边界文件 (.shp),请手动选择。")
|
print("⚠️ 提示:专题图生成模块需传入标准矢量边界文件 (.shp),请手动选择。")
|
||||||
|
|
||||||
# 6. 自动探测 Step 8 输出的水色指数 GeoTIFF(GeoTIFF 渲染模式)
|
# 6. 自动探测 Step 8 输出的水色指数 GeoTIFF(GeoTIFF 渲染模式)
|
||||||
step8_out_dir = Path(self.work_dir) / "8_WaterIndex_Images" if self.work_dir else None
|
step10_out_dir = Path(self.work_dir) / "10_WaterIndex_Images" if self.work_dir else None
|
||||||
if step8_out_dir and step8_out_dir.is_dir():
|
if step10_out_dir and step10_out_dir.is_dir():
|
||||||
# GeoTIFF 批量模式:填充目录供批量渲染
|
# GeoTIFF 批量模式:填充目录供批量渲染
|
||||||
if not (self.geotiff_dir_edit.text() or "").strip():
|
if not (self.geotiff_dir_edit.text() or "").strip():
|
||||||
self.geotiff_dir_edit.setText(str(step8_out_dir))
|
self.geotiff_dir_edit.setText(str(step10_out_dir))
|
||||||
# GeoTIFF 单文件模式:默认选中第一个
|
# GeoTIFF 单文件模式:默认选中第一个
|
||||||
tif_files = sorted(step8_out_dir.glob("*.tif"))
|
tif_files = sorted(step10_out_dir.glob("*.tif"))
|
||||||
if tif_files and not (self.geotiff_file.get_path() or "").strip():
|
if tif_files and not (self.geotiff_file.get_path() or "").strip():
|
||||||
self.geotiff_file.set_path(str(tif_files[0]))
|
self.geotiff_file.set_path(str(tif_files[0]))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
@ -253,7 +253,7 @@ class Step14Panel(QWidget):
|
|||||||
self.geotiff_dir_label = QLabel("水色指数目录:")
|
self.geotiff_dir_label = QLabel("水色指数目录:")
|
||||||
self.geotiff_dir_label.setMinimumWidth(120)
|
self.geotiff_dir_label.setMinimumWidth(120)
|
||||||
self.geotiff_dir_edit = QLineEdit()
|
self.geotiff_dir_edit = QLineEdit()
|
||||||
self.geotiff_dir_edit.setPlaceholderText("选择 8_WaterIndex_Images 文件夹(批量渲染)…")
|
self.geotiff_dir_edit.setPlaceholderText("选择 10_WaterIndex_Images 文件夹(批量渲染)…")
|
||||||
geotiff_dir_btn = QPushButton("浏览…")
|
geotiff_dir_btn = QPushButton("浏览…")
|
||||||
geotiff_dir_btn.setMaximumWidth(80)
|
geotiff_dir_btn.setMaximumWidth(80)
|
||||||
geotiff_dir_btn.clicked.connect(self.browse_geotiff_dir)
|
geotiff_dir_btn.clicked.connect(self.browse_geotiff_dir)
|
||||||
@ -392,7 +392,7 @@ class Step14Panel(QWidget):
|
|||||||
"""浏览 GeoTIFF 文件夹(批量模式)"""
|
"""浏览 GeoTIFF 文件夹(批量模式)"""
|
||||||
default = self._get_default_work_dir()
|
default = self._get_default_work_dir()
|
||||||
if default:
|
if default:
|
||||||
default = os.path.join(default, "8_WaterIndex_Images")
|
default = os.path.join(default, "10_WaterIndex_Images")
|
||||||
d = QFileDialog.getExistingDirectory(
|
d = QFileDialog.getExistingDirectory(
|
||||||
self, "选择水色指数 GeoTIFF 文件夹", default
|
self, "选择水色指数 GeoTIFF 文件夹", default
|
||||||
)
|
)
|
||||||
@ -514,18 +514,18 @@ class Step14Panel(QWidget):
|
|||||||
pred_dir = None
|
pred_dir = None
|
||||||
if hasattr(main_window, 'step11_prediction_panel'):
|
if hasattr(main_window, 'step11_prediction_panel'):
|
||||||
step8_widget = getattr(main_window.step11_prediction_panel, 'output_file', None)
|
step8_widget = getattr(main_window.step11_prediction_panel, 'output_file', None)
|
||||||
step8_output = ""
|
step10_output = ""
|
||||||
if hasattr(step8_widget, 'get_path'):
|
if hasattr(step8_widget, 'get_path'):
|
||||||
step8_output = step8_widget.get_path() or ""
|
step10_output = step8_widget.get_path() or ""
|
||||||
elif hasattr(step8_widget, 'text'):
|
elif hasattr(step8_widget, 'text'):
|
||||||
step8_output = step8_widget.text() or ""
|
step10_output = step8_widget.text() or ""
|
||||||
|
|
||||||
if step8_output:
|
if step10_output:
|
||||||
# 若为相对路径,使用 work_dir 合成为绝对路径
|
# 若为相对路径,使用 work_dir 合成为绝对路径
|
||||||
if not os.path.isabs(step8_output):
|
if not os.path.isabs(step10_output):
|
||||||
step8_output = os.path.join(self.work_dir or '', step8_output).replace('\\', '/')
|
step10_output = os.path.join(self.work_dir or '', step10_output).replace('\\', '/')
|
||||||
# 提取父目录后追加 Machine_Learning_Prediction(最底层真实子目录)
|
# 提取父目录后追加 Machine_Learning_Prediction(最底层真实子目录)
|
||||||
base_pred_dir = str(Path(step8_output).parent)
|
base_pred_dir = str(Path(step10_output).parent)
|
||||||
ml_pred_dir = Path(base_pred_dir) / "Machine_Learning_Prediction"
|
ml_pred_dir = Path(base_pred_dir) / "Machine_Learning_Prediction"
|
||||||
pred_dir = str(ml_pred_dir) if ml_pred_dir.exists() else base_pred_dir
|
pred_dir = str(ml_pred_dir) if ml_pred_dir.exists() else base_pred_dir
|
||||||
|
|
||||||
@ -594,13 +594,13 @@ class Step14Panel(QWidget):
|
|||||||
print("⚠️ 提示:专题图生成模块需传入标准矢量边界文件 (.shp),请手动选择。")
|
print("⚠️ 提示:专题图生成模块需传入标准矢量边界文件 (.shp),请手动选择。")
|
||||||
|
|
||||||
# 6. 自动探测 Step 8 输出的水色指数 GeoTIFF(GeoTIFF 渲染模式)
|
# 6. 自动探测 Step 8 输出的水色指数 GeoTIFF(GeoTIFF 渲染模式)
|
||||||
step8_out_dir = Path(self.work_dir) / "8_WaterIndex_Images" if self.work_dir else None
|
step10_out_dir = Path(self.work_dir) / "10_WaterIndex_Images" if self.work_dir else None
|
||||||
if step8_out_dir and step8_out_dir.is_dir():
|
if step10_out_dir and step10_out_dir.is_dir():
|
||||||
# GeoTIFF 批量模式:填充目录供批量渲染
|
# GeoTIFF 批量模式:填充目录供批量渲染
|
||||||
if not (self.geotiff_dir_edit.text() or "").strip():
|
if not (self.geotiff_dir_edit.text() or "").strip():
|
||||||
self.geotiff_dir_edit.setText(str(step8_out_dir))
|
self.geotiff_dir_edit.setText(str(step10_out_dir))
|
||||||
# GeoTIFF 单文件模式:默认选中第一个
|
# GeoTIFF 单文件模式:默认选中第一个
|
||||||
tif_files = sorted(step8_out_dir.glob("*.tif"))
|
tif_files = sorted(step10_out_dir.glob("*.tif"))
|
||||||
if tif_files and not (self.geotiff_file.get_path() or "").strip():
|
if tif_files and not (self.geotiff_file.get_path() or "").strip():
|
||||||
self.geotiff_file.set_path(str(tif_files[0]))
|
self.geotiff_file.set_path(str(tif_files[0]))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
@ -1378,7 +1378,8 @@ class WaterQualityGUI(QMainWindow):
|
|||||||
'step11_map': "14_visualization/"
|
'step11_map': "14_visualization/"
|
||||||
}
|
}
|
||||||
|
|
||||||
# 定义步骤间的依赖关系:{当前步骤: {输入字段: (依赖步骤, 输出类型, 面板属性名)}}
|
# 依赖关系字典结构:
|
||||||
|
# '当前步骤ID': { '依赖参数名': ('上游步骤ID', '上游输出类型/Key', '当前步骤接收该路径的组件属性名') }
|
||||||
self.step_dependencies = {
|
self.step_dependencies = {
|
||||||
'step2': {
|
'step2': {
|
||||||
'img_path': ('step1', 'reference_img', 'img_file'),
|
'img_path': ('step1', 'reference_img', 'img_file'),
|
||||||
@ -1386,31 +1387,36 @@ class WaterQualityGUI(QMainWindow):
|
|||||||
},
|
},
|
||||||
'step3': {
|
'step3': {
|
||||||
'img_path': ('step1', 'reference_img', 'img_file'),
|
'img_path': ('step1', 'reference_img', 'img_file'),
|
||||||
'water_mask': ('step1', 'water_mask', 'water_mask_file'),
|
'water_mask': ('step1', 'water_mask', 'water_mask_file')
|
||||||
|
},
|
||||||
|
'step4_sampling': {
|
||||||
|
'deglint_img_path': ('step3', 'deglint_image', 'deglint_img_file'),
|
||||||
|
'water_mask_path': ('step1', 'water_mask', 'water_mask_file')
|
||||||
|
},
|
||||||
|
'step5_clean': {
|
||||||
|
'csv_path': ('step4_sampling', 'sampling_spectra', 'csv_file') # step5 寻找 step4 的采样点
|
||||||
},
|
},
|
||||||
'step6_feature': {
|
'step6_feature': {
|
||||||
'deglint_img_path': ('step3', 'deglint_image', 'deglint_img_file'),
|
'deglint_img_path': ('step3', 'deglint_image', 'deglint_img_file'),
|
||||||
'csv_path': ('step5_clean', 'processed_data', 'csv_file'),
|
'csv_path': ('step5_clean', 'processed_data', 'csv_file'),
|
||||||
'boundary_mask_path': ('step1', 'water_mask', 'boundary_mask_file'),
|
'boundary_mask_path': ('step1', 'water_mask', 'water_mask_file'), # step6_panel里叫water_mask_file
|
||||||
'glint_mask_path': ('step2', 'glint_mask', 'glint_mask_file')
|
'glint_mask_path': ('step2', 'glint_mask', 'glint_mask_file')
|
||||||
},
|
},
|
||||||
'step7_index': {
|
'step7_index': {
|
||||||
'training_csv_path': ('step6_feature', 'training_spectra', 'output_file')
|
'training_csv_path': ('step6_feature', 'training_spectra', 'training_data_widget') # step7 找 step6 的光谱提取
|
||||||
},
|
},
|
||||||
'step8_ml_train': {
|
'step8_ml_train': {
|
||||||
'training_csv_path': ('step7_index', 'water_indices', 'csv_file')
|
'training_csv_file': ('step7_index', 'training_spectra_indices', 'training_csv_file') # step8 找 step7 的指数宽表
|
||||||
},
|
|
||||||
'step4_sampling': {
|
|
||||||
'deglint_img_path': ('step3', 'deglint_image', 'deglint_img_file'),
|
|
||||||
'water_mask_path': ('step1', 'water_mask', 'water_mask_file'),
|
|
||||||
'glint_mask_path': ('step2', 'glint_mask', 'glint_mask_file')
|
|
||||||
},
|
},
|
||||||
'step9_ml_predict': {
|
'step9_ml_predict': {
|
||||||
'sampling_csv_path': ('step4_sampling', 'sampling_points', 'sampling_csv_file'),
|
'models_dir': ('step8_ml_train', 'Supervised_Model_Training', 'models_dir_widget')
|
||||||
'models_dir': ('step8_ml_train', 'models', 'models_dir_file')
|
},
|
||||||
|
'step10_watercolor': {
|
||||||
|
'bsq_file': ('step3', 'deglint_image', 'bsq_file') # 水色反演需要去耀斑BSQ影像
|
||||||
},
|
},
|
||||||
'step11_map': {
|
'step11_map': {
|
||||||
'prediction_csv_path': ('step9_ml_predict', 'predictions', 'prediction_csv_file')
|
'prediction_csv_dir_edit': ('step9_ml_predict', 'Machine_Learning_Prediction', 'prediction_csv_dir_edit'),
|
||||||
|
'geotiff_dir_edit': ('step10_watercolor', 'WaterIndex_Images', 'geotiff_dir_edit')
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -4,6 +4,7 @@ if not hasattr(threading.Thread, "isAlive"):
|
|||||||
|
|
||||||
import warnings
|
import warnings
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from scipy import stats
|
from scipy import stats
|
||||||
@ -11,6 +12,54 @@ from scipy import stats
|
|||||||
warnings.filterwarnings("ignore")
|
warnings.filterwarnings("ignore")
|
||||||
|
|
||||||
|
|
||||||
|
def auto_detect_coord_columns(df: pd.DataFrame):
|
||||||
|
"""
|
||||||
|
双重验证智能识别坐标列:
|
||||||
|
1. 严格正则匹配列名
|
||||||
|
2. 基于数值范围的地理学推断
|
||||||
|
"""
|
||||||
|
lon_patterns = [
|
||||||
|
r'^lon', r'^lng', r'^longitude', r'经度', r'^x$', r'^utm_x$', r'^pixel_x$'
|
||||||
|
]
|
||||||
|
lat_patterns = [
|
||||||
|
r'^lat', r'^latitude', r'纬度', r'^y$', r'^utm_y$', r'^pixel_y$'
|
||||||
|
]
|
||||||
|
|
||||||
|
x_col, y_col = None, None
|
||||||
|
|
||||||
|
for col in df.columns:
|
||||||
|
col_str = str(col).lower().strip()
|
||||||
|
if x_col is None and any(re.search(p, col_str) for p in lon_patterns):
|
||||||
|
x_col = col
|
||||||
|
if y_col is None and any(re.search(p, col_str) for p in lat_patterns):
|
||||||
|
y_col = col
|
||||||
|
|
||||||
|
if x_col and y_col:
|
||||||
|
return x_col, y_col
|
||||||
|
|
||||||
|
numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
|
||||||
|
|
||||||
|
if len(numeric_cols) >= 2:
|
||||||
|
col1, col2 = numeric_cols[0], numeric_cols[1]
|
||||||
|
mean1 = df[col1].head(10).mean()
|
||||||
|
mean2 = df[col2].head(10).mean()
|
||||||
|
|
||||||
|
if abs(mean1) <= 90 and abs(mean2) > 90:
|
||||||
|
y_col, x_col = col1, col2
|
||||||
|
elif abs(mean2) <= 90 and abs(mean1) > 90:
|
||||||
|
x_col, y_col = col1, col2
|
||||||
|
else:
|
||||||
|
if mean1 > mean2:
|
||||||
|
x_col, y_col = col1, col2
|
||||||
|
else:
|
||||||
|
x_col, y_col = col2, col1
|
||||||
|
|
||||||
|
print(f"💡 触发智能数值推断坐标列: X/经度->[{x_col}], Y/纬度->[{y_col}]")
|
||||||
|
return x_col, y_col
|
||||||
|
|
||||||
|
return df.columns[0], df.columns[1]
|
||||||
|
|
||||||
|
|
||||||
def detect_outliers_iqr(data: pd.DataFrame, column: str) -> pd.Series:
|
def detect_outliers_iqr(data: pd.DataFrame, column: str) -> pd.Series:
|
||||||
"""使用 IQR 方法检测异常值,返回与 data 同索引的布尔序列"""
|
"""使用 IQR 方法检测异常值,返回与 data 同索引的布尔序列"""
|
||||||
s = pd.to_numeric(data[column], errors="coerce")
|
s = pd.to_numeric(data[column], errors="coerce")
|
||||||
@ -92,11 +141,15 @@ def process_water_quality_data(input_file: str, output_file: str):
|
|||||||
print(f"原始数据形状: {df.shape}")
|
print(f"原始数据形状: {df.shape}")
|
||||||
print(f"列名: {list(df.columns)}")
|
print(f"列名: {list(df.columns)}")
|
||||||
|
|
||||||
|
# 0.5) 智能检测坐标列
|
||||||
|
x_col, y_col = auto_detect_coord_columns(df)
|
||||||
|
print(f"坐标列检测结果: X/经度=[{x_col}], Y/纬度=[{y_col}]")
|
||||||
|
|
||||||
# 1) 经纬度精度筛选(小数位 >= 7)
|
# 1) 经纬度精度筛选(小数位 >= 7)
|
||||||
print("\n正在筛选经纬度精度(小数位>=7)...")
|
print("\n正在筛选经纬度精度(小数位>=7)...")
|
||||||
initial_count = len(df)
|
initial_count = len(df)
|
||||||
|
|
||||||
for col in ["经度", "纬度"]:
|
for col in [y_col, x_col]:
|
||||||
if col in df.columns:
|
if col in df.columns:
|
||||||
dec_len = df[col].apply(_decimal_len)
|
dec_len = df[col].apply(_decimal_len)
|
||||||
keep_mask = dec_len >= 7
|
keep_mask = dec_len >= 7
|
||||||
@ -109,13 +162,11 @@ def process_water_quality_data(input_file: str, output_file: str):
|
|||||||
|
|
||||||
# 2) 异常值检测(IQR)- 只删除异常值,不删除整行
|
# 2) 异常值检测(IQR)- 只删除异常值,不删除整行
|
||||||
print("\n正在检测异常值(IQR)...")
|
print("\n正在检测异常值(IQR)...")
|
||||||
# 数值列
|
|
||||||
numeric_columns = df.select_dtypes(include=[np.number]).columns.tolist()
|
numeric_columns = df.select_dtypes(include=[np.number]).columns.tolist()
|
||||||
|
|
||||||
# 排除不检测的列
|
exclude_columns = {"时间", "测量点", y_col, x_col}
|
||||||
exclude_columns = ["时间", "测量点", "纬度", "经度"]
|
|
||||||
if "原始" in df.columns:
|
if "原始" in df.columns:
|
||||||
exclude_columns.append("原始")
|
exclude_columns.add("原始")
|
||||||
|
|
||||||
columns_to_check = [c for c in numeric_columns if c not in exclude_columns]
|
columns_to_check = [c for c in numeric_columns if c not in exclude_columns]
|
||||||
print(f"将检测以下列的异常值: {columns_to_check}")
|
print(f"将检测以下列的异常值: {columns_to_check}")
|
||||||
@ -128,7 +179,6 @@ def process_water_quality_data(input_file: str, output_file: str):
|
|||||||
col_mask = detect_outliers_iqr(df_clean, column)
|
col_mask = detect_outliers_iqr(df_clean, column)
|
||||||
outlier_count = int(col_mask.sum())
|
outlier_count = int(col_mask.sum())
|
||||||
print(f'列 "{column}" 检测到 {outlier_count} 个异常值,将其设为 NaN')
|
print(f'列 "{column}" 检测到 {outlier_count} 个异常值,将其设为 NaN')
|
||||||
# 只将异常值设为 NaN,不删除整行
|
|
||||||
df_clean.loc[col_mask, column] = np.nan
|
df_clean.loc[col_mask, column] = np.nan
|
||||||
total_outliers_removed += outlier_count
|
total_outliers_removed += outlier_count
|
||||||
|
|
||||||
@ -140,7 +190,7 @@ def process_water_quality_data(input_file: str, output_file: str):
|
|||||||
df_clean = df_clean.drop(columns=["原始"])
|
df_clean = df_clean.drop(columns=["原始"])
|
||||||
print('已去除 "原始" 列')
|
print('已去除 "原始" 列')
|
||||||
|
|
||||||
# 4) 字段类型处理:尽量把“时间”转为 datetime
|
# 4) 字段类型处理:尽量把"时间"转为 datetime
|
||||||
if "时间" in df_clean.columns:
|
if "时间" in df_clean.columns:
|
||||||
try:
|
try:
|
||||||
df_clean["时间"] = pd.to_datetime(df_clean["时间"], errors="coerce")
|
df_clean["时间"] = pd.to_datetime(df_clean["时间"], errors="coerce")
|
||||||
@ -153,22 +203,18 @@ def process_water_quality_data(input_file: str, output_file: str):
|
|||||||
print('错误:未找到 "测量点" 列')
|
print('错误:未找到 "测量点" 列')
|
||||||
return
|
return
|
||||||
|
|
||||||
# 构建聚合字典
|
|
||||||
agg_dict = {}
|
agg_dict = {}
|
||||||
if "时间" in df_clean.columns and np.issubdtype(df_clean["时间"].dtype, np.datetime64):
|
if "时间" in df_clean.columns and np.issubdtype(df_clean["时间"].dtype, np.datetime64):
|
||||||
# 时间取平均(等价于时间戳平均)
|
|
||||||
agg_dict["时间"] = "mean"
|
agg_dict["时间"] = "mean"
|
||||||
elif "时间" in df_clean.columns:
|
elif "时间" in df_clean.columns:
|
||||||
# 如果不是时间类型,保留最常见值以避免无意义的字符串平均
|
|
||||||
agg_dict["时间"] = lambda s: s.mode().iloc[0] if not s.mode().empty else s.dropna().iloc[0] if s.dropna().size else np.nan
|
agg_dict["时间"] = lambda s: s.mode().iloc[0] if not s.mode().empty else s.dropna().iloc[0] if s.dropna().size else np.nan
|
||||||
|
|
||||||
for col in ["纬度", "经度"]:
|
for col in [y_col, x_col]:
|
||||||
if col in df_clean.columns:
|
if col in df_clean.columns:
|
||||||
agg_dict[col] = "mean"
|
agg_dict[col] = "mean"
|
||||||
|
|
||||||
# 其余数值列取均值
|
|
||||||
for col in df_clean.select_dtypes(include=[np.number]).columns:
|
for col in df_clean.select_dtypes(include=[np.number]).columns:
|
||||||
if col not in ["纬度", "经度"]:
|
if col not in {y_col, x_col}:
|
||||||
agg_dict[col] = "mean"
|
agg_dict[col] = "mean"
|
||||||
|
|
||||||
grouped = df_clean.groupby("测量点", as_index=False).agg(agg_dict)
|
grouped = df_clean.groupby("测量点", as_index=False).agg(agg_dict)
|
||||||
|
|||||||
@ -204,7 +204,7 @@ class Step11WaterColorPanel(QWidget):
|
|||||||
"输出目录:",
|
"输出目录:",
|
||||||
"Directories"
|
"Directories"
|
||||||
)
|
)
|
||||||
self.output_dir.line_edit.setPlaceholderText("留空 → 工作目录/8_WaterIndex_Images")
|
self.output_dir.line_edit.setPlaceholderText("留空 → 工作目录/10_WaterIndex_Images")
|
||||||
self.output_dir.browse_btn.clicked.disconnect()
|
self.output_dir.browse_btn.clicked.disconnect()
|
||||||
self.output_dir.browse_btn.clicked.connect(self._browse_output_dir)
|
self.output_dir.browse_btn.clicked.connect(self._browse_output_dir)
|
||||||
output_layout.addRow("输出目录:", self.output_dir)
|
output_layout.addRow("输出目录:", self.output_dir)
|
||||||
@ -452,7 +452,7 @@ class Step11WaterColorPanel(QWidget):
|
|||||||
|
|
||||||
# 自动填入输出目录
|
# 自动填入输出目录
|
||||||
if self.work_dir:
|
if self.work_dir:
|
||||||
out_dir = os.path.join(self.work_dir, "8_WaterIndex_Images").replace('\\', '/')
|
out_dir = os.path.join(self.work_dir, "10_WaterIndex_Images").replace('\\', '/')
|
||||||
os.makedirs(out_dir, exist_ok=True)
|
os.makedirs(out_dir, exist_ok=True)
|
||||||
if not self.output_dir.get_path():
|
if not self.output_dir.get_path():
|
||||||
self.output_dir.set_path(out_dir)
|
self.output_dir.set_path(out_dir)
|
||||||
@ -483,7 +483,7 @@ class Step11WaterColorPanel(QWidget):
|
|||||||
return
|
return
|
||||||
if not output_dir:
|
if not output_dir:
|
||||||
work_dir = self._get_default_work_dir()
|
work_dir = self._get_default_work_dir()
|
||||||
output_dir = os.path.join(work_dir, "8_WaterIndex_Images").replace('\\', '/')
|
output_dir = os.path.join(work_dir, "10_WaterIndex_Images").replace('\\', '/')
|
||||||
os.makedirs(output_dir, exist_ok=True)
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
self.output_dir.set_path(output_dir)
|
self.output_dir.set_path(output_dir)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user