import numpy as np from scipy import signal from sklearn.linear_model import LinearRegression from sklearn.preprocessing import MinMaxScaler, StandardScaler import pandas as pd import pywt from copy import deepcopy import joblib # 用于保存和加载模型 # 最大最小值归一化 def MMS(input_spectrum): output_spectrum = MinMaxScaler().fit_transform(input_spectrum) return output_spectrum # 标准化 def SS(input_spectrum, save_path=None): # 初始化 StandardScaler 并拟合数据 scaler = StandardScaler() output_spectrum = scaler.fit_transform(input_spectrum) # 如果指定了保存路径,保存 scaler 对象 if save_path: joblib.dump(scaler, save_path) print(f"Scaler parameters saved to {save_path}") return output_spectrum # 均值中心化 def CT(input_spectrum): output_spectrum = deepcopy(input_spectrum) for i in range(output_spectrum.shape[0]): MEAN = np.mean(output_spectrum[i]) output_spectrum[i] = output_spectrum[i] - MEAN return output_spectrum # 标准正态变换 def SNV(input_spectrum): if not isinstance(input_spectrum, pd.DataFrame): raise ValueError("Input spectrum must be a Pandas DataFrame") data_average = input_spectrum.mean(axis=1) data_std = input_spectrum.std(axis=1) data_std = data_std.replace(0, 1) output_spectrum = (input_spectrum.sub(data_average, axis=0)).div(data_std, axis=0) return output_spectrum # 移动平均平滑 def MA(input_spectrum, WSZ=11): output_spectrum = deepcopy(input_spectrum) for i in range(output_spectrum.shape[0]): out0 = np.convolve(output_spectrum[i], np.ones(WSZ, dtype=int), 'valid') / WSZ r = np.arange(1, WSZ - 1, 2) start = np.cumsum(output_spectrum[i, :WSZ - 1])[::2] / r stop = (np.cumsum(output_spectrum[i, :-WSZ:-1])[::2] / r)[::-1] output_spectrum[i] = np.concatenate((start, out0, stop)) return output_spectrum # Savitzky-Golay平滑滤波 def SG(input_spectrum, w=15, p=2): output_spectrum = signal.savgol_filter(input_spectrum, w, p) return output_spectrum # 一阶导数 def D1(input_spectrum): n, p = input_spectrum.shape output_spectrum = np.ones((n, p - 1)) for i in range(n): output_spectrum[i] = np.diff(input_spectrum[i]) return output_spectrum # 二阶导数 def D2(input_spectrum): temp2 = (pd.DataFrame(input_spectrum)).diff(axis=1) temp3 = np.delete(temp2.values, 0, axis=1) temp4 = (pd.DataFrame(temp3)).diff(axis=1) output_spectrum = np.delete(temp4.values, 0, axis=1) return output_spectrum # 趋势校正 def DT(input_spectrum): lenth = input_spectrum.shape[1] x = np.asarray(range(lenth), dtype=np.float32) output_spectrum = np.array(input_spectrum) l = LinearRegression() for i in range(output_spectrum.shape[0]): l.fit(x.reshape(-1, 1), output_spectrum[i].reshape(-1, 1)) k = l.coef_ b = l.intercept_ for j in range(output_spectrum.shape[1]): output_spectrum[i][j] = output_spectrum[i][j] - (j * k + b) return output_spectrum # 多元散射校正 def MSC(input_spectrum): n, p = input_spectrum.shape output_spectrum = np.ones((n, p)) mean = np.mean(input_spectrum, axis=0) for i in range(n): y = input_spectrum[i, :] l = LinearRegression() l.fit(mean.reshape(-1, 1), y.reshape(-1, 1)) k = l.coef_ b = l.intercept_ output_spectrum[i, :] = (y - b) / k return output_spectrum # 小波变换 def wave(input_spectrum): def wave_(input_spectrum_row): w = pywt.Wavelet('db8') maxlev = pywt.dwt_max_level(len(input_spectrum_row), w.dec_len) coeffs = pywt.wavedec(input_spectrum_row, 'db8', level=maxlev) threshold = 0.04 for i in range(1, len(coeffs)): coeffs[i] = pywt.threshold(coeffs[i], threshold * max(coeffs[i])) output_spectrum_row = pywt.waverec(coeffs, 'db8') return output_spectrum_row output_spectrum = None for i in range(input_spectrum.shape[0]): if i == 0: output_spectrum = wave_(input_spectrum[i]) else: output_spectrum = np.vstack((output_spectrum, wave_(input_spectrum[i]))) return output_spectrum # 通用预处理函数 def Preprocessing(method, input_spectrum): if isinstance(input_spectrum, np.ndarray): input_spectrum = pd.DataFrame(input_spectrum) if method == "None": output_spectrum = input_spectrum elif method == 'MMS': output_spectrum = MMS(input_spectrum.values) elif method == 'SS': output_spectrum = SS(input_spectrum.values, r'E:\code\plastic\plastic20260224\plastic\plastic\output\20260224\modelsave\scaler_params.pkl') elif method == 'CT': output_spectrum = CT(input_spectrum.values) elif method == 'SNV': output_spectrum = SNV(input_spectrum) elif method == 'MA': output_spectrum = MA(input_spectrum.values) elif method == 'SG': output_spectrum = SG(input_spectrum.values) elif method == 'MSC': output_spectrum = MSC(input_spectrum.values) elif method == 'D1': output_spectrum = D1(input_spectrum.values) elif method == 'D2': output_spectrum = D2(input_spectrum.values) elif method == 'DT': output_spectrum = DT(input_spectrum.values) elif method == 'WVAE': output_spectrum = wave(input_spectrum.values) else: print("No such method of preprocessing!") output_spectrum = input_spectrum.values return output_spectrum