#!/usr/bin/env python # -*- coding: utf-8 -*- """ Step4 面板 - 数据预处理 """ import os import sys from pathlib import Path # 路径归一化 helper(与 pipeline.get_step_output_dir 互为表里) _HERE = os.path.dirname(os.path.abspath(__file__)) if _HERE not in sys.path: sys.path.insert(0, _HERE) from _step_path_resolver import resolve_subdir import pandas as pd from PyQt5.QtWidgets import ( QWidget, QVBoxLayout, QGroupBox, QHBoxLayout, QLabel, QSpinBox, QPushButton, QCheckBox, QTableView, QAbstractItemView, QHeaderView, QMessageBox, ) from PyQt5.QtCore import Qt from src.gui.components.custom_widgets import FileSelectWidget from src.gui.styles import ModernStylesheet class Step5CleanPanel(QWidget): """步骤5:数据清洗""" def __init__(self, parent=None): super().__init__(parent) self.init_ui() def init_ui(self): layout = QVBoxLayout() # 标题 # CSV文件 self.csv_file = FileSelectWidget( "水质参数文件:", "CSV Files (*.csv);;All Files (*.*)" ) layout.addWidget(self.csv_file) hint = QLabel("提示: 处理CSV文件,筛选剔除异常值") hint.setStyleSheet("color: #666; font-size: 10px;") layout.addWidget(hint) preview_group = QGroupBox("CSV数据预览") preview_layout = QVBoxLayout() controls_layout = QHBoxLayout() controls_layout.addWidget(QLabel("预览行数:")) self.preview_rows_spin = QSpinBox() self.preview_rows_spin.setRange(1, 200) self.preview_rows_spin.setValue(10) controls_layout.addWidget(self.preview_rows_spin) self.preview_btn = QPushButton("刷新预览") self.preview_btn.clicked.connect(self.load_csv_preview) controls_layout.addWidget(self.preview_btn) controls_layout.addStretch() self.preview_table = QTableView() self.preview_table.setEditTriggers(QAbstractItemView.NoEditTriggers) self.preview_table.setSelectionBehavior(QAbstractItemView.SelectRows) self.preview_table.setSelectionMode(QAbstractItemView.SingleSelection) self.preview_table.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch) self.preview_table.verticalHeader().setVisible(False) self.preview_table.setMinimumHeight(200) self.preview_status_label = QLabel("请选择CSV文件并点击刷新预览") self.preview_status_label.setStyleSheet("color: #666; font-size: 11px;") preview_layout.addLayout(controls_layout) preview_layout.addWidget(self.preview_table) preview_layout.addWidget(self.preview_status_label) preview_group.setLayout(preview_layout) layout.addWidget(preview_group) # 输出文件路径 self.output_file = FileSelectWidget( "输出处理后CSV:", "CSV Files (*.csv);;All Files (*.*)" ) self.output_file.line_edit.setPlaceholderText("processed_data.csv") layout.addWidget(self.output_file) # 启用步骤 self.enable_checkbox = QCheckBox("启用此步骤") self.enable_checkbox.setChecked(True) layout.addWidget(self.enable_checkbox) # 独立运行按钮 self.run_btn = QPushButton("独立运行此步骤") self.run_btn.setStyleSheet(ModernStylesheet.get_button_stylesheet('success')) self.run_btn.clicked.connect(self._on_run_single_clicked) layout.addWidget(self.run_btn) layout.addStretch() self.setLayout(layout) self.reset_preview() def get_config(self): """获取配置""" config = { 'csv_path': self.csv_file.get_path(), } output_path = self.output_file.get_path() if output_path: config['output_path'] = output_path return config def set_config(self, config): """设置配置""" if 'csv_path' in config: self.csv_file.set_path(config['csv_path']) self.load_csv_preview() if 'output_path' in config: self.output_file.set_path(config['output_path']) def update_from_config(self, work_dir=None, pipeline=None): """从全局配置自动填充输出路径 Args: work_dir: 工作目录路径 pipeline: Pipeline 实例(未使用,保留接口兼容性) """ if work_dir: self.work_dir = work_dir elif hasattr(self, 'work_dir') and self.work_dir: pass else: self.work_dir = None if self.work_dir: output_dir = resolve_subdir(self.work_dir, 'data_cleaning') os.makedirs(output_dir, exist_ok=True) default_output_path = os.path.join(output_dir, "processed_data.csv").replace('\\', '/') self.output_file.set_path(default_output_path) else: self.output_file.set_path("") def _on_run_single_clicked(self): """通过 EventBus 发布单步执行请求(解耦面板与 PipelineExecutor)。""" from src.gui.core.event_bus import global_event_bus csv_path = self.csv_file.get_path() if not csv_path: QMessageBox.warning(self, "输入错误", "请选择水质参数文件!") return config = {'step5_clean': self.get_config()} global_event_bus.publish('RequestRunSingleStep', { 'step_name': 'step5_clean', 'config': config, }) def run_step(self): """独立运行步骤5(旧版 parent 链上溯方式,保留兼容)。""" csv_path = self.csv_file.get_path() if not csv_path: QMessageBox.warning(self, "输入错误", "请选择水质参数文件!") return main_window = self.window() if hasattr(main_window, 'run_single_step'): config = {'step5_clean': self.get_config()} main_window.run_single_step('step5_clean', config) def reset_preview(self, message="请选择CSV文件并点击刷新预览"): """重置预览表格""" from src.gui.water_quality_gui import PandasTableModel empty_model = PandasTableModel(pd.DataFrame()) self.preview_table.setModel(empty_model) self.preview_status_label.setText(message) def load_csv_preview(self): """加载CSV预览数据""" from src.gui.water_quality_gui import PandasTableModel csv_path = self.csv_file.get_path() if not csv_path: self.reset_preview("请先选择CSV文件") return if not os.path.exists(csv_path): self.reset_preview("文件不存在,请检查路径") return try: rows_to_preview = max(1, self.preview_rows_spin.value()) # dtype=object 确保所有列以字符串读取,避免空值/混合类型导致 dtype 报错 df = pd.read_csv(csv_path, nrows=rows_to_preview, dtype=object) # fillna 在 PandasTableModel.__init__ 中已执行,此处再次防御性处理 df = df.fillna('') if df.empty: self.reset_preview("CSV文件为空") return model = PandasTableModel(df) self.preview_table.setModel(model) self.preview_status_label.setText( f"预览 {len(df)} 行,{len(df.columns)} 列(总行数可能更多)" ) except Exception as exc: self.reset_preview(f"加载失败: {exc}")