158 lines
5.5 KiB
Python
158 lines
5.5 KiB
Python
import numpy as np
|
|
from scipy import signal
|
|
from sklearn.linear_model import LinearRegression
|
|
from sklearn.preprocessing import MinMaxScaler, StandardScaler
|
|
import pandas as pd
|
|
import pywt
|
|
from copy import deepcopy
|
|
import joblib # 用于保存和加载模型
|
|
# 最大最小值归一化
|
|
def MMS(input_spectrum):
|
|
output_spectrum = MinMaxScaler().fit_transform(input_spectrum)
|
|
return output_spectrum
|
|
|
|
# 标准化
|
|
def SS(input_spectrum, save_path=None):
|
|
# 初始化 StandardScaler 并拟合数据
|
|
scaler = StandardScaler()
|
|
output_spectrum = scaler.fit_transform(input_spectrum)
|
|
|
|
# 如果指定了保存路径,保存 scaler 对象
|
|
if save_path:
|
|
joblib.dump(scaler, save_path)
|
|
print(f"Scaler parameters saved to {save_path}")
|
|
|
|
return output_spectrum
|
|
|
|
# 均值中心化
|
|
def CT(input_spectrum):
|
|
output_spectrum = deepcopy(input_spectrum)
|
|
for i in range(output_spectrum.shape[0]):
|
|
MEAN = np.mean(output_spectrum[i])
|
|
output_spectrum[i] = output_spectrum[i] - MEAN
|
|
return output_spectrum
|
|
|
|
# 标准正态变换
|
|
def SNV(input_spectrum):
|
|
if not isinstance(input_spectrum, pd.DataFrame):
|
|
raise ValueError("Input spectrum must be a Pandas DataFrame")
|
|
data_average = input_spectrum.mean(axis=1)
|
|
data_std = input_spectrum.std(axis=1)
|
|
data_std = data_std.replace(0, 1)
|
|
output_spectrum = (input_spectrum.sub(data_average, axis=0)).div(data_std, axis=0)
|
|
return output_spectrum
|
|
|
|
# 移动平均平滑
|
|
def MA(input_spectrum, WSZ=11):
|
|
output_spectrum = deepcopy(input_spectrum)
|
|
for i in range(output_spectrum.shape[0]):
|
|
out0 = np.convolve(output_spectrum[i], np.ones(WSZ, dtype=int), 'valid') / WSZ
|
|
r = np.arange(1, WSZ - 1, 2)
|
|
start = np.cumsum(output_spectrum[i, :WSZ - 1])[::2] / r
|
|
stop = (np.cumsum(output_spectrum[i, :-WSZ:-1])[::2] / r)[::-1]
|
|
output_spectrum[i] = np.concatenate((start, out0, stop))
|
|
return output_spectrum
|
|
|
|
# Savitzky-Golay平滑滤波
|
|
def SG(input_spectrum, w=15, p=2):
|
|
output_spectrum = signal.savgol_filter(input_spectrum, w, p)
|
|
return output_spectrum
|
|
|
|
# 一阶导数
|
|
def D1(input_spectrum):
|
|
n, p = input_spectrum.shape
|
|
output_spectrum = np.ones((n, p - 1))
|
|
for i in range(n):
|
|
output_spectrum[i] = np.diff(input_spectrum[i])
|
|
return output_spectrum
|
|
|
|
# 二阶导数
|
|
def D2(input_spectrum):
|
|
temp2 = (pd.DataFrame(input_spectrum)).diff(axis=1)
|
|
temp3 = np.delete(temp2.values, 0, axis=1)
|
|
temp4 = (pd.DataFrame(temp3)).diff(axis=1)
|
|
output_spectrum = np.delete(temp4.values, 0, axis=1)
|
|
return output_spectrum
|
|
|
|
# 趋势校正
|
|
def DT(input_spectrum):
|
|
lenth = input_spectrum.shape[1]
|
|
x = np.asarray(range(lenth), dtype=np.float32)
|
|
output_spectrum = np.array(input_spectrum)
|
|
l = LinearRegression()
|
|
for i in range(output_spectrum.shape[0]):
|
|
l.fit(x.reshape(-1, 1), output_spectrum[i].reshape(-1, 1))
|
|
k = l.coef_
|
|
b = l.intercept_
|
|
for j in range(output_spectrum.shape[1]):
|
|
output_spectrum[i][j] = output_spectrum[i][j] - (j * k + b)
|
|
return output_spectrum
|
|
|
|
# 多元散射校正
|
|
def MSC(input_spectrum):
|
|
n, p = input_spectrum.shape
|
|
output_spectrum = np.ones((n, p))
|
|
mean = np.mean(input_spectrum, axis=0)
|
|
for i in range(n):
|
|
y = input_spectrum[i, :]
|
|
l = LinearRegression()
|
|
l.fit(mean.reshape(-1, 1), y.reshape(-1, 1))
|
|
k = l.coef_
|
|
b = l.intercept_
|
|
output_spectrum[i, :] = (y - b) / k
|
|
return output_spectrum
|
|
|
|
# 小波变换
|
|
def wave(input_spectrum):
|
|
def wave_(input_spectrum_row):
|
|
w = pywt.Wavelet('db8')
|
|
maxlev = pywt.dwt_max_level(len(input_spectrum_row), w.dec_len)
|
|
coeffs = pywt.wavedec(input_spectrum_row, 'db8', level=maxlev)
|
|
threshold = 0.04
|
|
for i in range(1, len(coeffs)):
|
|
coeffs[i] = pywt.threshold(coeffs[i], threshold * max(coeffs[i]))
|
|
output_spectrum_row = pywt.waverec(coeffs, 'db8')
|
|
return output_spectrum_row
|
|
|
|
output_spectrum = None
|
|
for i in range(input_spectrum.shape[0]):
|
|
if i == 0:
|
|
output_spectrum = wave_(input_spectrum[i])
|
|
else:
|
|
output_spectrum = np.vstack((output_spectrum, wave_(input_spectrum[i])))
|
|
|
|
return output_spectrum
|
|
|
|
# 通用预处理函数
|
|
def Preprocessing(method, input_spectrum):
|
|
if isinstance(input_spectrum, np.ndarray):
|
|
input_spectrum = pd.DataFrame(input_spectrum)
|
|
if method == "None":
|
|
output_spectrum = input_spectrum
|
|
elif method == 'MMS':
|
|
output_spectrum = MMS(input_spectrum.values)
|
|
elif method == 'SS':
|
|
output_spectrum = SS(input_spectrum.values, r'E:\code\plastic\plastic20260224\plastic\plastic\output\20260224\modelsave\scaler_params.pkl')
|
|
elif method == 'CT':
|
|
output_spectrum = CT(input_spectrum.values)
|
|
elif method == 'SNV':
|
|
output_spectrum = SNV(input_spectrum)
|
|
elif method == 'MA':
|
|
output_spectrum = MA(input_spectrum.values)
|
|
elif method == 'SG':
|
|
output_spectrum = SG(input_spectrum.values)
|
|
elif method == 'MSC':
|
|
output_spectrum = MSC(input_spectrum.values)
|
|
elif method == 'D1':
|
|
output_spectrum = D1(input_spectrum.values)
|
|
elif method == 'D2':
|
|
output_spectrum = D2(input_spectrum.values)
|
|
elif method == 'DT':
|
|
output_spectrum = DT(input_spectrum.values)
|
|
elif method == 'WVAE':
|
|
output_spectrum = wave(input_spectrum.values)
|
|
else:
|
|
print("No such method of preprocessing!")
|
|
output_spectrum = input_spectrum.values
|
|
return output_spectrum
|