diff --git a/src/core/steps/modeling_step.py b/src/core/steps/modeling_step.py index 405e4bc..b58d20f 100644 --- a/src/core/steps/modeling_step.py +++ b/src/core/steps/modeling_step.py @@ -14,6 +14,114 @@ import pandas as pd import numpy as np +# ============================================================ +# 汉化 -> 英文 反向映射字典(UI 复选框显示文本 -> 底层算法键名) +# ============================================================ + +# 模型名称:中文 (缩写) -> 英文键名 +MODEL_NAME_MAP = { + "多元线性回归 (MLR)": "LinearRegression", + "岭回归 (Ridge)": "Ridge", + "套索回归 (Lasso)": "Lasso", + "弹性网络 (ElasticNet)": "ElasticNet", + "偏最小二乘 (PLSR)": "PLS", + "决策树 (CART)": "DecisionTree", + "随机森林 (RF)": "RF", + "极端随机树 (ET)": "ExtraTrees", + "极值梯度提升 (XGBoost)": "XGBoost", + "轻量梯度提升 (LightGBM)": "LightGBM", + "类别梯度提升 (CatBoost)": "CatBoost", + "梯度提升树 (GBDT)": "GradientBoosting", + "自适应提升 (AdaBoost)": "AdaBoost", + "支持向量回归 (SVR)": "SVR", + "K近邻回归 (KNN)": "KNN", + "多层感知机 (BP神经网络)": "MLP", +} + +# 预处理方法:各种可能的中文变体 -> 标准键名 +PREPROC_NAME_MAP = { + # 无处理 + "无 (None)": "None", + "None": "None", + # MMS + "最小-最大归一化 (MMS)": "MMS", + "MMS": "MMS", + # SS + "标度化 (SS)": "SS", + "SS": "SS", + # SNV + "标准正态变换 (SNV)": "SNV", + "SNV": "SNV", + # MA + "移动平均 (MA)": "MA", + "MA": "MA", + # SG + "Savitzky-Golay (SG)": "SG", + "SG": "SG", + # MSC + "多元散射校正 (MSC)": "MSC", + "MSC": "MSC", + # D1 + "一阶导数 (D1)": "D1", + "D1": "D1", + # D2 + "二阶导数 (D2)": "D2", + "D2": "D2", + # DT + "去趋势 (DT)": "DT", + "DT": "DT", + # CT + "中心化 (CT)": "CT", + "CT": "CT", +} + +# 数据划分方法:各种可能的中文变体 -> 标准键名 +SPLIT_NAME_MAP = { + "SPXY 算法 (考量X-Y空间)": "spxy", + "spxy": "spxy", + "KS 算法 (考量X空间)": "ks", + "ks": "ks", + "随机划分 (Random)": "random", + "random": "random", +} + + +def _normalize_model_names(model_names: List[str]) -> List[str]: + """清洗模型名称列表:将汉化显示文本还原为英文键名""" + result = [] + for name in model_names: + if name in MODEL_NAME_MAP: + result.append(MODEL_NAME_MAP[name]) + else: + # 已经是英文键名,直接保留 + result.append(name) + return result + + +def _normalize_preprocessing_methods(methods: List[str]) -> List[str]: + """清洗预处理方法列表:将汉化显示文本还原为标准键名""" + result = [] + for method in methods: + if method in PREPROC_NAME_MAP: + result.append(PREPROC_NAME_MAP[method]) + else: + # 已经是标准键名,直接保留 + result.append(method) + return result + + +def _normalize_split_methods(methods: List[str]) -> List[str]: + """清洗数据划分方法列表:将汉化显示文本还原为标准键名""" + result = [] + for method in methods: + if method in SPLIT_NAME_MAP: + result.append(SPLIT_NAME_MAP[method]) + else: + # 已经是标准键名,直接保留 + result.append(method) + return result + + class ModelingStep: """建模步骤""" @@ -75,6 +183,15 @@ class ModelingStep: if split_methods is None: split_methods = ["spxy", "ks", "random"] + # ---- 汉化清洗:将 UI 传来的中文/混合名称转换为底层英文键名 ---- + preprocessing_methods = _normalize_preprocessing_methods(preprocessing_methods) + model_names = _normalize_model_names(model_names) + split_methods = _normalize_split_methods(split_methods) + + print(f"[参数清洗] 预处理方法: {preprocessing_methods}") + print(f"[参数清洗] 模型名称: {model_names}") + print(f"[参数清洗] 划分方法: {split_methods}") + modeler = WaterQualityModelingBatch(str(output_dir)) modeler.train_models_batch( csv_path=training_csv_path, diff --git a/src/gui/panels/step6_panel.py b/src/gui/panels/step6_panel.py index 81e1e6b..729697e 100644 --- a/src/gui/panels/step6_panel.py +++ b/src/gui/panels/step6_panel.py @@ -17,6 +17,57 @@ from src.gui.components.custom_widgets import FileSelectWidget from src.gui.styles import ModernStylesheet +# ============================================================ +# 中文映射表(内部键名 -> 显示文本) +# ============================================================ + +# 预处理方法:内部键 -> 显示文本 +PREPROC_CHINESE = { + 'None': '无 (None)', + 'MMS': '最小-最大归一化 (MMS)', + 'SS': '标度化 (SS)', + 'SNV': '标准正态变换 (SNV)', + 'MA': '移动平均 (MA)', + 'SG': 'Savitzky-Golay (SG)', + 'MSC': '多元散射校正 (MSC)', + 'D1': '一阶导数 (D1)', + 'D2': '二阶导数 (D2)', + 'DT': '去趋势 (DT)', + 'CT': '中心化 (CT)', +} + +# 模型类型:内部键 -> 显示文本 +MODEL_CHINESE = { + # 线性模型 + 'LinearRegression': '多元线性回归 (MLR)', + 'Ridge': '岭回归 (Ridge)', + 'Lasso': '套索回归 (Lasso)', + 'ElasticNet': '弹性网络 (ElasticNet)', + 'PLS': '偏最小二乘 (PLSR)', + # 树模型 + 'DecisionTree': '决策树 (CART)', + 'RF': '随机森林 (RF)', + 'ExtraTrees': '极端随机树 (ET)', + 'XGBoost': '极值梯度提升 (XGBoost)', + 'LightGBM': '轻量梯度提升 (LightGBM)', + 'CatBoost': '类别梯度提升 (CatBoost)', + # 集成学习 + 'GradientBoosting': '梯度提升树 (GBDT)', + 'AdaBoost': '自适应提升 (AdaBoost)', + # 其他模型 + 'SVR': '支持向量回归 (SVR)', + 'KNN': 'K近邻回归 (KNN)', + 'MLP': '多层感知机 (BP神经网络)', +} + +# 数据划分方法:内部键 -> 显示文本 +SPLIT_CHINESE = { + 'spxy': 'SPXY 算法 (考量X-Y空间)', + 'ks': 'KS 算法 (考量X空间)', + 'random': '随机划分 (Random)', +} + + class Step6Panel(QWidget): """步骤6:机器学习建模""" def __init__(self, parent=None): @@ -54,7 +105,7 @@ class Step6Panel(QWidget): # 启用步骤 self.enable_checkbox = QCheckBox("启用此步骤") - self.enable_checkbox.setChecked(True) + self.enable_checkbox.setChecked(False) layout.addWidget(self.enable_checkbox) # 独立运行按钮 @@ -95,8 +146,8 @@ class Step6Panel(QWidget): preproc_methods = ['None', 'MMS', 'SS', 'SNV', 'MA', 'SG', 'MSC', 'D1', 'D2', 'DT', 'CT'] for i, method in enumerate(preproc_methods): - checkbox = QCheckBox(method) - checkbox.setChecked(True) + checkbox = QCheckBox(PREPROC_CHINESE.get(method, method)) + checkbox.setChecked(False) self.preproc_checkboxes[method] = checkbox preproc_grid.addWidget(checkbox, i // 4, i % 4) @@ -122,10 +173,10 @@ class Step6Panel(QWidget): self.model_checkboxes = {} model_groups = [ - ("线性模型", ['LinearRegression', 'Ridge', 'Lasso', 'ElasticNet', 'PLS']), - ("树模型", ['DecisionTree', 'RF', 'ExtraTrees', 'XGBoost', 'LightGBM', 'CatBoost']), - ("集成学习", ['GradientBoosting', 'AdaBoost']), - ("其他模型", ['SVR', 'KNN', 'MLP']) + ("【线性模型】", ['LinearRegression', 'Ridge', 'Lasso', 'ElasticNet', 'PLS']), + ("【树模型】", ['DecisionTree', 'RF', 'ExtraTrees', 'XGBoost', 'LightGBM', 'CatBoost']), + ("【集成学习】", ['GradientBoosting', 'AdaBoost']), + ("【其他模型】", ['SVR', 'KNN', 'MLP']) ] row = 0 @@ -140,8 +191,8 @@ class Step6Panel(QWidget): row += 1 for i, model in enumerate(models): - checkbox = QCheckBox(model) - checkbox.setChecked(model in ['SVR', 'RF', 'Ridge', 'Lasso']) + checkbox = QCheckBox(MODEL_CHINESE.get(model, model)) + checkbox.setChecked(False) self.model_checkboxes[model] = checkbox model_grid.addWidget(checkbox, row, i % 4) if (i + 1) % 4 == 0: @@ -172,8 +223,8 @@ class Step6Panel(QWidget): split_methods = ['spxy', 'ks', 'random'] for i, method in enumerate(split_methods): - checkbox = QCheckBox(method) - checkbox.setChecked(True) + checkbox = QCheckBox(SPLIT_CHINESE.get(method, method)) + checkbox.setChecked(False) self.split_checkboxes[method] = checkbox split_grid.addWidget(checkbox, 0, i)