refactor(water_index): 用 waterindex.csv 驱动公式计算,移除 45 个硬编码方法

This commit is contained in:
DXC
2026-06-09 11:24:15 +08:00
parent 371e7a2745
commit 624a5bdcd4
3 changed files with 314 additions and 918 deletions

View File

@ -134,81 +134,103 @@ class BandMathCalculator:
print(f"{i} 行计算失败使用NaN填充")
return results
def _parse_coeff(self, coeff_str: str) -> np.ndarray:
"""
将 Coefficient 字符串解析为 numpy 多项式系数数组。
- "1.0" → [1.0]
- "a,b,c" → [a, b, c] (多项式,从高次到低次,供 np.polyval 直接使用)
- "1.0,2.0" → [1.0, 2.0] (线性 y = a*x + b
"""
s = str(coeff_str).strip() if not pd.isna(coeff_str) else ""
if s in ("", "nan", "None"):
return np.array([1.0])
parts = [float(x.strip()) for x in s.split(",")]
return np.array(parts)
def process_formulas_from_csv(self, formula_csv_file, formula_names=None, output_file=None):
"""
从公式CSV文件中批量计算并添加到数据文件中
从公式CSV文件中批量计算并添加到数据文件中
支持两种 CSV 格式:
- 新版6列Formula_Name, Category, Formula_Type, Formula, Coefficient, Reference
- 旧版≥3列第一列=公式名,第三列=表达式(兼容处理)
Formula_Type 为 'concentration' 时,计算比值后额外通过 np.polyval 应用 Coefficient。
参数:
formula_csv_file: 公式CSV文件路径,第一列为公式名称,第三列为具体公式
formula_csv_file: 公式CSV文件路径
formula_names: 要计算的公式名称列表如果为None则计算所有公式
output_file: 输出文件路径如果为None则自动生成
返回:
包含计算结果的新DataFrame
"""
# 读取公式CSV文件
try:
formulas_df = pd.read_csv(formula_csv_file)
print(f"读取到 {len(formulas_df)} 个公式")
# 检查CSV格式假设第一列为公式名称第三列为具体公式
if len(formulas_df.columns) < 3:
raise ValueError("公式CSV文件需要至少3列")
formula_name_col = formulas_df.columns[0] # 第一列:公式名称
formula_expr_col = formulas_df.columns[2] # 第三列:具体公式
# 创建结果DataFrame的副本
has_new_format = set(["Formula_Name", "Formula_Type", "Formula", "Coefficient"]).issubset(
set(formulas_df.columns)
)
if has_new_format:
name_col = "Formula_Name"
type_col = "Formula_Type"
expr_col = "Formula"
coeff_col = "Coefficient"
else:
name_col = formulas_df.columns[0]
type_col = None
expr_col = formulas_df.columns[2]
coeff_col = None
result_df = self.df.copy()
# 如果指定了公式名称,则只计算这些公式
if formula_names is not None:
if isinstance(formula_names, str):
formula_names = [formula_names] # 转换为列表
# 筛选出指定公式
selected_formulas = formulas_df[formulas_df[formula_name_col].isin(formula_names)]
print(f"找到 {len(selected_formulas)} 个指定的公式")
if len(selected_formulas) == 0:
print(f"警告: 未找到指定的公式: {formula_names}")
formula_names = [formula_names]
selected = formulas_df[formulas_df[name_col].isin(formula_names)]
print(f"找到 {len(selected)}指定公式")
if len(selected) == 0:
print(f"警告: 未找到指定公式: {formula_names}")
return result_df
formulas_to_process = selected_formulas
formulas_to_process = selected
else:
# 计算所有公式
formulas_to_process = formulas_df
# 为每个公式计算所有行
for _, row in formulas_to_process.iterrows():
formula_name = row[formula_name_col]
formula_expr = row[formula_expr_col]
formula_name = row[name_col]
formula_expr = row[expr_col]
if pd.isna(formula_name) or pd.isna(formula_expr):
print(f"跳过空公式: {row}")
print(f"跳过空公式: {row.to_dict()}")
continue
print(f"\n计算公式: {formula_name} = {formula_expr}")
# 计算所有行的结果
ftype = str(row[type_col]).strip().lower() if type_col and not pd.isna(row.get(type_col)) else "ratio"
coeff_str = str(row[coeff_col]).strip() if coeff_col and not pd.isna(row.get(coeff_col)) else "1.0"
print(f"\n计算公式: {formula_name} = {formula_expr} [type={ftype}, coeff={coeff_str}]")
results = self.calculate_all_rows(formula_expr)
# 将结果添加到DataFrame
if ftype == "concentration":
coeff = self._parse_coeff(coeff_str)
results = np.polyval(coeff, np.array(results))
result_df[formula_name] = results
print(f"公式 '{formula_name}' 计算完成,添加到数据中")
# 保存结果
print(f"公式 '{formula_name}' 计算完成")
if output_file is None:
# 自动生成输出文件名
import os
base_name = os.path.splitext(os.path.basename(formula_csv_file))[0]
output_file = f"band_math_results_{base_name}.csv"
result_df.to_csv(output_file, index=False)
print(f"结果已保存到: {output_file}")
return result_df
except Exception as e:
print(f"处理公式CSV文件时出错: {e}")
import traceback

File diff suppressed because it is too large Load Diff