services/step2-5:打通前四个预处理步骤的真实后端独立服务

新增 src/new/services/{step2,step3,step4,step5}_service.py 四个独立后端服务:
This commit is contained in:
DXC
2026-06-17 09:15:22 +08:00
parent ef3de632d3
commit f8d5ea2eb8
6 changed files with 672 additions and 13 deletions

View File

@ -0,0 +1,136 @@
# -*- coding: utf-8 -*-
"""
Step5 后端计算服务(数据清洗)
====================================
纯计算函数——绝对不引用 PyQt、绝对不引用 main_view、绝对不读写全局变量。它只
1. 从 ``config`` 字典读取参数;
2. 调用旧版 ``DataPreparationStep.process_csv`` 读取水质 CSV
筛选剔除异常值后写出新的 CSV
3. 返回结果字典 ``{status, output_path, message, mode}``。
调用入口(由 main_view 在后台 QThread 中调用):
execute_step5({
"csv_path": "D:/water_quality.csv", # 输入水质参数 CSV
"enabled": True,
"output_path": "D:/processed_data.csv", # 处理后输出 CSV
"work_dir": "D:/workspace", # 工作目录main_view 注入)
})
返回字典字段:
* ``status`` : "completed" | "skipped" | "error"
* ``output_path`` : 生成的 .csv 清洗后文件路径(失败时为 None
* ``message`` : 人类可读说明
* ``mode`` : "csv_clean"(便于 UI 提示)
"""
from __future__ import annotations
from pathlib import Path
from typing import Any, Dict
from src.core.steps.data_preparation_step import DataPreparationStep
def _resolve_output_dir(output_path: str | None, work_dir: str) -> Path:
"""根据 output_path / work_dir 计算清洗后 CSV 输出目录"""
if output_path:
return Path(output_path).parent
return Path(work_dir) / "5_Data_Cleaning"
def execute_step5(config: Dict[str, Any]) -> Dict[str, Any]:
"""Step 5 后端计算入口——纯函数
Args:
config: 由前端 view.get_config() 序列化、再经 main_view 注入 work_dir 的字典
Returns:
标准结果字典 ``{status, output_path, message, mode}``
"""
# ---------- 入参规整 ----------
csv_path = config.get("csv_path")
enabled = bool(config.get("enabled", True))
output_path = config.get("output_path")
work_dir = config.get("work_dir") or "."
output_dir = _resolve_output_dir(output_path, work_dir)
mode = "csv_clean"
# ---------- 提前失败检查 ----------
if not enabled:
return {
"status": "skipped",
"output_path": None,
"message": "用户禁用此步骤enabled=False",
"mode": mode,
}
if not csv_path:
return {
"status": "error",
"output_path": None,
"message": "未提供水质参数 CSV 路径csv_path",
"mode": mode,
}
if not Path(csv_path).exists():
return {
"status": "error",
"output_path": None,
"message": f"水质参数 CSV 不存在: {csv_path}",
"mode": mode,
}
# ---------- 显式构造 output_path如未指定----------
# process_csv 内部会用 output_dir/processed_data.csv 自行拼接,
# 此处显式指定 output_path 以保持契约一致。
if not output_path:
output_path = str(output_dir / "processed_data.csv").replace("\\", "/")
# ---------- 执行(包一层 try/except 把异常转 dict避免炸线程 ----------
try:
result_path = DataPreparationStep.process_csv(
csv_path=csv_path,
output_dir=output_dir,
callback=None, # 日志由 main_view 统一接管
)
except FileNotFoundError as e:
return {
"status": "error",
"output_path": None,
"message": f"文件不存在: {e}",
"mode": mode,
}
except ValueError as e:
return {
"status": "error",
"output_path": None,
"message": f"参数错误: {e}",
"mode": mode,
}
except Exception as e: # noqa: BLE001 —— service 层兜底捕获所有
return {
"status": "error",
"output_path": None,
"message": f"{type(e).__name__}: {e}",
"mode": mode,
}
# ---------- 成功路径 ----------
p = Path(result_path)
if not p.exists():
return {
"status": "error",
"output_path": None,
"message": f"DataPreparationStep.process_csv 未生成文件: {result_path}",
"mode": mode,
}
return {
"status": "completed",
"output_path": str(p).replace("\\", "/"),
"message": f"清洗后 CSV 已保存: {p.name}",
"mode": mode,
}