结构修改,后端文件跟前端内容进行适配

This commit is contained in:
DXC
2026-06-11 17:44:24 +08:00
parent 3584c07b67
commit e59703f163
12 changed files with 1311 additions and 495 deletions

View File

@ -5,345 +5,411 @@ Step8 面板 - 机器学习建模
"""
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from typing import Dict, List, Optional, Tuple
from PyQt5.QtWidgets import (
QWidget, QVBoxLayout, QGroupBox, QGridLayout,
QHBoxLayout, QLabel, QCheckBox, QPushButton, QMessageBox,
QScrollArea, QListWidget, QListWidgetItem, QAbstractItemView,
QRadioButton, QButtonGroup
QWidget, QVBoxLayout, QGroupBox, QFormLayout, QGridLayout,
QHBoxLayout, QLabel, QLineEdit, QSpinBox, QCheckBox,
QPushButton, QFileDialog, QMessageBox,
)
from PyQt5.QtCore import Qt
from PyQt5.QtGui import QColor, QBrush, QFont
from src.gui.components.custom_widgets import FileSelectWidget
from src.gui.styles import ModernStylesheet
def get_resource_path(relative_path: str) -> str:
"""适配开发与 PyInstaller 环境的路径获取逻辑。"""
if hasattr(sys, '_MEIPASS'):
internal = os.path.join(sys._MEIPASS, '_internal', relative_path)
if os.path.exists(internal):
return internal
return os.path.join(sys._MEIPASS, relative_path)
# ============================================================
# 中文映射表(内部键名 -> 显示文本)
# ============================================================
exe_dir = os.path.dirname(sys.executable)
internal = os.path.join(exe_dir, '_internal', relative_path)
if os.path.exists(internal):
return internal
# 预处理方法:内部键 -> 显示文本
PREPROC_CHINESE = {
'None': '无 (None)',
'MMS': '最小-最大归一化 (MMS)',
'SS': '标度化 (SS)',
'SNV': '标准正态变换 (SNV)',
'MA': '移动平均 (MA)',
'SG': 'Savitzky-Golay (SG)',
'MSC': '多元散射校正 (MSC)',
'D1': '一阶导数 (D1)',
'D2': '二阶导数 (D2)',
'DT': '去趋势 (DT)',
'CT': '中心化 (CT)',
}
base_dir = Path(__file__).resolve().parent.parent / "model"
return str(base_dir / os.path.basename(relative_path))
# 模型类型:内部键 -> 显示文本
MODEL_CHINESE = {
# 线性模型
'LinearRegression': '多元线性回归 (MLR)',
'Ridge': '岭回归 (Ridge)',
'Lasso': '套索回归 (Lasso)',
'ElasticNet': '弹性网络 (ElasticNet)',
'PLS': '偏最小二乘 (PLSR)',
# 树模型
'DecisionTree': '决策树 (CART)',
'RF': '随机森林 (RF)',
'ExtraTrees': '极端随机树 (ET)',
'XGBoost': '极值梯度提升 (XGBoost)',
'LightGBM': '轻量梯度提升 (LightGBM)',
'CatBoost': '类别梯度提升 (CatBoost)',
# 集成学习
'GradientBoosting': '梯度提升树 (GBDT)',
'AdaBoost': '自适应提升 (AdaBoost)',
# 其他模型
'SVR': '支持向量回归 (SVR)',
'KNN': 'K近邻回归 (KNN)',
'MLP': '多层感知机 (BP神经网络)',
}
# 数据划分方法:内部键 -> 显示文本
SPLIT_CHINESE = {
'spxy': 'SPXY 算法 (考量X-Y空间)',
'ks': 'KS 算法 (考量X空间)',
'random': '随机划分 (Random)',
}
class Step8MlTrainPanel(QWidget):
"""步骤8机器学习建模"""
COLOR_RATIO = QColor(255, 255, 255)
COLOR_CONCENTRATION = QColor(220, 240, 255)
COLOR_HEADER = QColor(245, 245, 245)
def __init__(self, parent=None):
super().__init__(parent)
self.index_checkboxes: Dict[str, QListWidgetItem] = {}
self.work_dir: Optional[str] = None
self.builtin_formula_path = get_resource_path("waterindex.csv")
self._formula_type_map: Dict[str, str] = {}
self._formula_color_map: Dict[str, QColor] = {}
self._formula_coef_map: Dict[str, List[float]] = {}
self.init_ui()
self._auto_load_formulas()
def init_ui(self):
main_layout = QVBoxLayout()
main_layout.setContentsMargins(20, 20, 20, 20)
main_layout.setSpacing(10)
layout = QVBoxLayout()
# 1. 公式配置源 (只读)
path_group = QGroupBox("公式配置源 (内置)")
path_layout = QVBoxLayout()
self.formula_csv_widget = FileSelectWidget("内置CSV路径:", "CSV Files (*.csv)")
self.formula_csv_widget.set_path(self.builtin_formula_path)
self.formula_csv_widget.set_read_only(True)
self.formula_csv_widget.line_edit.setStyleSheet("background-color: #f0f0f0; color: #666;")
path_layout.addWidget(self.formula_csv_widget)
path_group.setLayout(path_layout)
main_layout.addWidget(path_group)
# 标题
# 2. 训练数据输入
input_group = QGroupBox("输入样本数据")
input_layout = QVBoxLayout()
self.training_data_widget = FileSelectWidget("特征提取CSV:", "CSV Files (*.csv)")
input_layout.addWidget(self.training_data_widget)
input_group.setLayout(input_layout)
main_layout.addWidget(input_group)
# 3. 公式选择区 (分组 ListWidget)
self.formula_group = QGroupBox("待计算水质指数勾选")
formula_outer_layout = QVBoxLayout()
# 训练数据文件(用于独立运行)
self.training_csv_file = FileSelectWidget(
"训练数据:",
"CSV Files (*.csv);;All Files (*.*)"
)
layout.addWidget(self.training_csv_file)
btn_layout = QHBoxLayout()
self.select_all_btn = QPushButton("全选")
self.deselect_all_btn = QPushButton("清空")
self.select_ratio_btn = QPushButton("仅选比值型")
self.select_conc_btn = QPushButton("仅选浓度型")
self.select_all_btn.clicked.connect(self.select_all_formulas)
self.deselect_all_btn.clicked.connect(self.deselect_all_formulas)
self.select_ratio_btn.clicked.connect(self._select_ratio_only)
self.select_conc_btn.clicked.connect(self._select_conc_only)
btn_layout.addWidget(self.select_all_btn)
btn_layout.addWidget(self.deselect_all_btn)
btn_layout.addWidget(self.select_ratio_btn)
btn_layout.addWidget(self.select_conc_btn)
btn_layout.addStretch()
# 机器学习模型页面
self.ml_page = QWidget()
self.create_ml_page()
layout.addWidget(self.ml_page)
self.refresh_button = QPushButton("重新加载")
self.refresh_button.clicked.connect(lambda: self.refresh_formulas(silent=False))
btn_layout.addWidget(self.refresh_button)
# 输出文件路径
self.output_path = FileSelectWidget(
"输出文件:",
"CSV Files (*.csv);;All Files (*.*)",
mode="save"
)
self.output_path.line_edit.setPlaceholderText("自动生成,或手动指定输出文件路径...")
self.output_path.browse_btn.clicked.disconnect()
self.output_path.browse_btn.clicked.connect(self.browse_output_path)
layout.addWidget(self.output_path)
formula_outer_layout.addLayout(btn_layout)
# 启用步骤
self.enable_checkbox = QCheckBox("启用此步骤")
self.enable_checkbox.setChecked(False)
layout.addWidget(self.enable_checkbox)
scroll = QScrollArea()
scroll.setWidgetResizable(True)
scroll.setMinimumHeight(280)
self.scroll_content = QWidget()
self.formula_layout = QVBoxLayout(self.scroll_content)
self.formula_layout.setContentsMargins(4, 4, 4, 4)
self.formula_layout.setSpacing(2)
self.formula_layout.setAlignment(Qt.AlignTop)
# 独立运行按钮
self.run_btn = QPushButton("独立运行此步骤")
self.run_btn.setStyleSheet(ModernStylesheet.get_button_stylesheet('success'))
self.run_btn.clicked.connect(self.run_step)
layout.addWidget(self.run_btn)
self.formula_list = QListWidget()
self.formula_list.setSelectionMode(QAbstractItemView.MultiSelection)
self.formula_list.itemChanged.connect(self._on_item_changed)
self.formula_layout.addWidget(self.formula_list)
layout.addStretch()
self.setLayout(layout)
scroll.setWidget(self.scroll_content)
formula_outer_layout.addWidget(scroll)
def create_ml_page(self):
"""创建机器学习模型页面"""
layout = QVBoxLayout()
self.formula_group.setLayout(formula_outer_layout)
main_layout.addWidget(self.formula_group)
# 参数设置
params_group = QGroupBox("训练参数")
params_layout = QFormLayout()
# 4. 输出选项
output_group = QGroupBox("输出模式")
output_layout = QVBoxLayout()
self.feature_start = QLineEdit()
self.feature_start.setText("374.285004")
params_layout.addRow("特征起始列:", self.feature_start)
mode_layout = QHBoxLayout()
self.mode_group = QButtonGroup()
self.radio_both = QRadioButton("两者皆出")
self.radio_wide = QRadioButton("仅宽表")
self.radio_single = QRadioButton("仅单文件")
self.mode_group.addButton(self.radio_both, 0)
self.mode_group.addButton(self.radio_wide, 1)
self.mode_group.addButton(self.radio_single, 2)
self.radio_both.setChecked(True)
mode_layout.addWidget(self.radio_both)
mode_layout.addWidget(self.radio_wide)
mode_layout.addWidget(self.radio_single)
mode_layout.addStretch()
output_layout.addLayout(mode_layout)
self.cv_folds = QSpinBox()
self.cv_folds.setRange(2, 10)
self.cv_folds.setValue(3)
params_layout.addRow("交叉验证折数:", self.cv_folds)
self.enable_checkbox = QCheckBox("启用计算流程")
self.enable_checkbox.setChecked(True)
output_layout.addWidget(self.enable_checkbox)
params_group.setLayout(params_layout)
layout.addWidget(params_group)
output_group.setLayout(output_layout)
main_layout.addWidget(output_group)
# 预处理方法 - 多选
preproc_group = QGroupBox("预处理方法 (可多选)")
preproc_layout = QVBoxLayout()
# 5. 运行按钮
self.run_button = QPushButton("立即执行计算")
self.run_button.setStyleSheet(ModernStylesheet.get_button_stylesheet('success'))
self.run_button.setMinimumHeight(40)
self.run_button.clicked.connect(self.run_step)
main_layout.addWidget(self.run_button)
preproc_grid = QGridLayout()
self.preproc_checkboxes = {}
preproc_methods = ['None', 'MMS', 'SS', 'SNV', 'MA', 'SG', 'MSC', 'D1', 'D2', 'DT', 'CT']
self.setLayout(main_layout)
for i, method in enumerate(preproc_methods):
checkbox = QCheckBox(PREPROC_CHINESE.get(method, method))
checkbox.setChecked(False)
self.preproc_checkboxes[method] = checkbox
preproc_grid.addWidget(checkbox, i // 4, i % 4)
def _on_item_changed(self, item: QListWidgetItem):
if item.checkState() == Qt.Checked:
bg_color = self.COLOR_RATIO
for name, ref_item in self.index_checkboxes.items():
if ref_item is item:
bg_color = self._formula_color_map.get(name, self.COLOR_RATIO)
break
item.setBackground(QBrush(bg_color))
else:
item.setBackground(QBrush(self.COLOR_RATIO))
button_layout = QHBoxLayout()
select_all_btn = QPushButton("全选")
deselect_all_btn = QPushButton("全不选")
select_all_btn.clicked.connect(lambda: self._toggle_checkboxes(self.preproc_checkboxes, True))
deselect_all_btn.clicked.connect(lambda: self._toggle_checkboxes(self.preproc_checkboxes, False))
button_layout.addWidget(select_all_btn)
button_layout.addWidget(deselect_all_btn)
button_layout.addStretch()
def _auto_load_formulas(self):
if os.path.exists(self.builtin_formula_path):
self.refresh_formulas(silent=True)
else:
print(f"DEBUG: 自动加载失败,路径不存在: {self.builtin_formula_path}")
preproc_layout.addLayout(preproc_grid)
preproc_layout.addLayout(button_layout)
preproc_group.setLayout(preproc_layout)
layout.addWidget(preproc_group)
def refresh_formulas(self, silent=False):
path = self.builtin_formula_path
if not os.path.exists(path):
if not silent:
QMessageBox.warning(self, "错误", f"找不到内置公式文件:\n{path}")
return
# 模型选择 - 多选
model_group = QGroupBox("模型类型 (可多选)")
model_layout = QVBoxLayout()
try:
df = None
for enc in ('utf-8', 'gbk', 'utf-8-sig'):
try:
df = pd.read_csv(path, encoding=enc)
if 'Formula_Name' in df.columns:
break
except Exception:
continue
model_grid = QGridLayout()
self.model_checkboxes = {}
if df is None or 'Formula_Name' not in df.columns:
if not silent:
QMessageBox.critical(self, "错误", "CSV缺少 'Formula_Name'")
return
self._formula_type_map.clear()
self._formula_coef_map.clear()
for _, row in df.iterrows():
name = str(row['Formula_Name']).strip()
if not name:
continue
ftype = str(row.get('Formula_Type', 'ratio')).strip().lower()
self._formula_type_map[name] = ftype
coef_str = str(row.get('Coefficient', '')).strip()
if coef_str:
try:
coeffs = [float(c.strip()) for c in coef_str.split(',') if c.strip()]
self._formula_coef_map[name] = coeffs
except Exception:
self._formula_coef_map[name] = []
else:
self._formula_coef_map[name] = []
self.formula_list.clear()
self.index_checkboxes.clear()
self._formula_color_map.clear()
for name, ftype in self._formula_type_map.items():
item = QListWidgetItem(name, self.formula_list)
item.setCheckState(Qt.Checked)
if ftype == 'concentration':
bg_color = QColor(220, 240, 255)
else:
bg_color = self.COLOR_RATIO
self._formula_color_map[name] = bg_color
item.setBackground(QBrush(bg_color))
self.index_checkboxes[name] = item
self.formula_list.adjustSize()
print(f"✅ 加载 {len(self.index_checkboxes)} 个公式")
except Exception as e:
if not silent:
QMessageBox.critical(self, "加载失败", f"原因: {str(e)}")
def _select_ratio_only(self):
for name, item in self.index_checkboxes.items():
ftype = self._formula_type_map.get(name, 'ratio')
item.setCheckState(Qt.Checked if ftype == 'ratio' else Qt.Unchecked)
def _select_conc_only(self):
for name, item in self.index_checkboxes.items():
ftype = self._formula_type_map.get(name, 'ratio')
item.setCheckState(Qt.Checked if ftype == 'concentration' else Qt.Unchecked)
def select_all_formulas(self):
for item in self.index_checkboxes.values():
item.setCheckState(Qt.Checked)
def deselect_all_formulas(self):
for item in self.index_checkboxes.values():
item.setCheckState(Qt.Unchecked)
def get_config(self) -> Dict:
selected = [
name for name, item in self.index_checkboxes.items()
if item.checkState() == Qt.Checked
model_groups = [
("【线性模型】", ['LinearRegression', 'Ridge', 'Lasso', 'ElasticNet', 'PLS']),
("【树模型】", ['DecisionTree', 'RF', 'ExtraTrees', 'XGBoost', 'LightGBM', 'CatBoost']),
("【集成学习】", ['GradientBoosting', 'AdaBoost']),
("【其他模型】", ['SVR', 'KNN', 'MLP'])
]
formula_coefficients = {
name: self._formula_coef_map.get(name, [])
for name in selected
}
return {
'training_csv_path': self.training_data_widget.get_path(),
'formula_csv_file': self.builtin_formula_path,
'formula_names': selected,
'formula_coefficients': formula_coefficients,
'enabled': self.enable_checkbox.isChecked(),
'output_mode': self.mode_group.checkedId(),
}
def set_config(self, config: Dict):
row = 0
for group_name, models in model_groups:
group_label = QLabel(f"<b>{group_name}</b>")
group_label.setStyleSheet(
f"background-color: {ModernStylesheet.COLORS['hover']}; "
f"padding: 5px; border: 1px solid {ModernStylesheet.COLORS['border_light']}; "
f"border-radius: 3px;"
)
model_grid.addWidget(group_label, row, 0, 1, 4)
row += 1
for i, model in enumerate(models):
checkbox = QCheckBox(MODEL_CHINESE.get(model, model))
checkbox.setChecked(False)
self.model_checkboxes[model] = checkbox
model_grid.addWidget(checkbox, row, i % 4)
if (i + 1) % 4 == 0:
row += 1
row += 1
model_button_layout = QHBoxLayout()
model_select_all = QPushButton("全选")
model_deselect_all = QPushButton("全不选")
model_select_all.clicked.connect(lambda: self._toggle_checkboxes(self.model_checkboxes, True))
model_deselect_all.clicked.connect(lambda: self._toggle_checkboxes(self.model_checkboxes, False))
model_button_layout.addWidget(model_select_all)
model_button_layout.addWidget(model_deselect_all)
model_button_layout.addStretch()
model_layout.addLayout(model_grid)
model_layout.addLayout(model_button_layout)
model_group.setLayout(model_layout)
layout.addWidget(model_group)
# 数据划分方法 - 多选
split_group = QGroupBox("数据划分方法 (可多选)")
split_layout = QVBoxLayout()
split_grid = QGridLayout()
self.split_checkboxes = {}
split_methods = ['spxy', 'ks', 'random']
for i, method in enumerate(split_methods):
checkbox = QCheckBox(SPLIT_CHINESE.get(method, method))
checkbox.setChecked(False)
self.split_checkboxes[method] = checkbox
split_grid.addWidget(checkbox, 0, i)
split_button_layout = QHBoxLayout()
split_select_all = QPushButton("全选")
split_deselect_all = QPushButton("全不选")
split_select_all.clicked.connect(lambda: self._toggle_checkboxes(self.split_checkboxes, True))
split_deselect_all.clicked.connect(lambda: self._toggle_checkboxes(self.split_checkboxes, False))
split_button_layout.addWidget(split_select_all)
split_button_layout.addWidget(split_deselect_all)
split_button_layout.addStretch()
split_layout.addLayout(split_grid)
split_layout.addLayout(split_button_layout)
split_group.setLayout(split_layout)
layout.addWidget(split_group)
self.ml_page.setLayout(layout)
def _toggle_checkboxes(self, checkboxes_dict, checked):
"""统一设置checkbox状态"""
for checkbox in checkboxes_dict.values():
checkbox.setChecked(checked)
def _get_default_work_dir(self):
"""获取 work_dir优先用 panel 自身缓存的,否则尝试从主窗口取"""
if hasattr(self, 'work_dir') and self.work_dir:
return str(self.work_dir)
mw = self.window()
if mw and hasattr(mw, 'work_dir') and mw.work_dir:
return str(mw.work_dir)
return ""
def browse_output_path(self):
"""浏览输出文件路径(保存对话框)"""
current = self.output_path.get_path().strip()
if current:
initial_dir = os.path.dirname(current)
initial_file = os.path.basename(current)
else:
initial_dir = ""
initial_file = ""
if not initial_dir or not os.path.isdir(initial_dir):
# 默认定位到 indices 目录
work_dir = self._get_default_work_dir()
initial_dir = os.path.join(work_dir, "6_water_quality_indices") if work_dir else ""
if initial_dir and not os.path.isdir(initial_dir):
os.makedirs(initial_dir, exist_ok=True)
file_path, _ = QFileDialog.getSaveFileName(
self, "保存输出文件", os.path.join(initial_dir, initial_file) if initial_file else initial_dir,
"CSV Files (*.csv);;All Files (*.*)"
)
if file_path:
self.output_path.set_path(file_path)
def get_config(self):
"""获取配置"""
preprocessing_methods = [
method for method, checkbox in self.preproc_checkboxes.items()
if checkbox.isChecked()
]
model_names = [
model for model, checkbox in self.model_checkboxes.items()
if checkbox.isChecked()
]
split_methods = [
method for method, checkbox in self.split_checkboxes.items()
if checkbox.isChecked()
]
config = {
'feature_start_column': self.feature_start.text(),
'preprocessing_methods': preprocessing_methods if preprocessing_methods else ['None'],
'model_names': model_names if model_names else ['SVR'],
'split_methods': split_methods if split_methods else ['random'],
'cv_folds': self.cv_folds.value()
}
training_csv_path = self.training_csv_file.get_path()
if training_csv_path:
config['training_csv_path'] = training_csv_path
output_path = self.output_path.get_path()
if output_path:
config['output_path'] = output_path
return config
def set_config(self, config):
"""设置配置"""
if 'feature_start_column' in config:
self.feature_start.setText(str(config['feature_start_column']))
if 'cv_folds' in config:
self.cv_folds.setValue(config['cv_folds'])
if 'preprocessing_methods' in config:
methods = config['preprocessing_methods']
for method, checkbox in self.preproc_checkboxes.items():
checkbox.setChecked(method in methods)
if 'model_names' in config:
models = config['model_names']
for model, checkbox in self.model_checkboxes.items():
checkbox.setChecked(model in models)
if 'split_methods' in config:
methods = config['split_methods']
for method, checkbox in self.split_checkboxes.items():
checkbox.setChecked(method in methods)
if 'training_csv_path' in config:
self.training_data_widget.set_path(config['training_csv_path'])
if 'formula_names' in config:
sel = set(config['formula_names'])
for name, item in self.index_checkboxes.items():
item.setCheckState(Qt.Checked if name in sel else Qt.Unchecked)
self.enable_checkbox.setChecked(config.get('enabled', True))
if 'output_mode' in config:
btn = self.mode_group.button(config['output_mode'])
if btn:
btn.setChecked(True)
self.training_csv_file.set_path(config['training_csv_path'])
if 'output_path' in config:
self.output_path.set_path(config['output_path'])
def update_from_config(self, work_dir=None, pipeline=None):
"""从全局配置自动填充训练数据和输出路径
Args:
work_dir: 工作目录路径
pipeline: Pipeline 实例(未使用,保留接口兼容性)
"""
if work_dir:
self.work_dir = work_dir
main = self.window()
if hasattr(main, 'step5_panel'):
p5 = main.step5_panel.output_file.get_path()
if p5:
if not os.path.isabs(p5):
p5 = os.path.join(self.work_dir or '', p5)
p5 = p5.replace('\\', '/')
self.training_data_widget.set_path(p5)
elif hasattr(self, 'work_dir') and self.work_dir:
pass
else:
self.work_dir = None
def _get_work_dir(self) -> Optional[str]:
# 1. 尝试从 Step5 界面读取训练数据路径,并确保为绝对路径
main_window = self.window()
if hasattr(main_window, 'step5_panel'):
# 优先直接从 Step5 的输出 widget 读取
step5_output = main_window.step5_panel.output_file.get_path()
if step5_output:
# 若为相对路径,使用 work_dir 合成为绝对路径
if not os.path.isabs(step5_output):
step5_output = os.path.join(self.work_dir or '', step5_output).replace('\\', '/')
self.training_csv_file.set_path(step5_output)
elif hasattr(main_window, 'step5_panel') and hasattr(main_window.step5_panel, 'get_config'):
# 回退:从 Step5 的 config 字典中查找可能的键名
step5_cfg = main_window.step5_panel.get_config()
step5_csv = (
step5_cfg.get('training_csv_path')
or step5_cfg.get('output_file')
or step5_cfg.get('csv_path')
or step5_cfg.get('output_csv')
)
if step5_csv:
# 若为相对路径,使用 work_dir 合成为绝对路径
if not os.path.isabs(step5_csv):
step5_csv = os.path.join(self.work_dir or '', step5_csv).replace('\\', '/')
self.training_csv_file.set_path(step5_csv)
# 2. 自动填充输出文件路径(基于工作目录和输入文件名)
# 输入是 training_spectra.csv → 输出 {work_dir}/6_water_quality_indices/training_spectra_indices.csv
# 输入是 sampling_spectra.csv → 输出 {work_dir}/6_water_quality_indices/sampling_spectra_indices.csv
if self.work_dir:
return self.work_dir
main = self.window()
if hasattr(main, 'work_dir') and main.work_dir:
return main.work_dir
return None
def _get_coord_cols(self, df: pd.DataFrame) -> Tuple[str, str]:
coord_candidates = ['lon', 'lng', 'longitude', '经度', 'x', 'lon_utm', 'utm_x', 'pixel_x']
lat_candidates = ['lat', 'latitude', '纬度', 'y', 'lat_utc', 'utm_y', 'pixel_y']
x_col, y_col = None, None
for col in df.columns:
cl = col.lower()
if x_col is None and any(c in cl for c in coord_candidates):
x_col = col
if y_col is None and any(c in cl for c in lat_candidates):
y_col = col
if x_col is None and len(df.columns) >= 2:
x_col = df.columns[0]
if y_col is None and len(df.columns) >= 2:
y_col = df.columns[1]
return x_col or 'x_coord', y_col or 'y_coord'
indices_dir = os.path.join(self.work_dir, "6_water_quality_indices")
os.makedirs(indices_dir, exist_ok=True)
training_csv = self.training_csv_file.get_path()
if training_csv:
basename = os.path.splitext(os.path.basename(training_csv))[0]
output_file = f"{basename}_indices.csv"
else:
output_file = "water_quality_indices.csv"
output_path = os.path.join(indices_dir, output_file).replace('\\', '/')
self.output_path.set_path(output_path)
else:
self.output_path.set_path("")
def run_step(self):
config = self.get_config()
if not config['enabled']:
QMessageBox.information(self, "提示", "已禁用计算流程(启用计算流程未勾选)")
return
training_path = config['training_csv_path']
if not training_path or not os.path.exists(training_path):
QMessageBox.warning(self, "提示", "请先选择输入特征提取CSV文件")
"""独立运行步骤8"""
training_csv_path = self.training_csv_file.get_path()
if not training_csv_path:
QMessageBox.warning(self, "输入错误", "请选择训练数据CSV文件")
return
main_window = self.window()
if hasattr(main_window, 'run_single_step'):
pipeline_config = {'step8_ml_train': config}
main_window.run_single_step('step8_ml_train', pipeline_config)
config = {'step8_ml_train': self.get_config()}
main_window.run_single_step('step8_ml_train', config)
def get_training_params(self):
"""获取模型训练参数"""
return {
'pipeline_type': 'machine_learning',
'feature_start': float(self.feature_start.text()),
'cv_folds': self.cv_folds.value(),
'preprocess_methods': [method for method, cb in self.preproc_checkboxes.items() if cb.isChecked()],
'model_types': [model for model, cb in self.model_checkboxes.items() if cb.isChecked()],
'split_methods': [method for method, cb in self.split_checkboxes.items() if cb.isChecked()]
}