初始提交

This commit is contained in:
2026-02-25 09:42:51 +08:00
parent c25276c481
commit d84d886f35
182 changed files with 18438 additions and 0 deletions

View File

@ -0,0 +1,111 @@
import lightgbm as lgb
import numpy as np
from scipy.linalg import qr
from progress.bar import Bar
from matplotlib import pyplot as plt
from sklearn.metrics import accuracy_score
class SPA_acc:
def _projections_qr(self, X, k, M):
X_projected = X.copy()
norms = np.sum((X ** 2), axis=0)
norm_max = np.amax(norms)
X_projected.iloc[:, k] = X_projected.iloc[:, k] * 2 * norm_max / norms[k]
_, __, order = qr(X_projected.to_numpy(), 0, pivoting=True)
return order[:M].T
def _validation(self, Xcal, ycal, var_sel, Xval=None, yval=None):
train_data = lgb.Dataset(Xcal.iloc[:, var_sel], label=ycal)
params = {
'objective': 'multiclass', # 多分类任务的目标函数
'boosting_type': 'gbdt',
'metric': 'multi_logloss', # 使用多分类交叉熵损失
'device': 'gpu', # 启用GPU加速
'verbosity': -1,
'num_class': len(np.unique(ycal)) # 设置类别数量
}
# LightGBM 模型训练
model = lgb.train(params, train_data, num_boost_round=100)
# 在验证集或训练集上进行预测
if Xval is not None and yval is not None:
yhat = model.predict(Xval.iloc[:, var_sel])
yhat = np.argmax(yhat, axis=1) # 获取每个样本的最大概率类别
accuracy = accuracy_score(yval, yhat)
else:
yhat = model.predict(Xcal.iloc[:, var_sel])
yhat = np.argmax(yhat, axis=1) # 获取每个样本的最大概率类别
accuracy = accuracy_score(ycal, yhat)
return yhat, accuracy
def spa(self, Xcal, ycal, m_min=1, m_max=None, Xval=None, yval=None, autoscaling=1, save_path=None):
N, K = Xcal.shape
m_max = min(N - 1, K) if m_max is None else m_max
normalization_factor = Xcal.std(ddof=1, axis=0) if autoscaling else np.ones(K)
Xcaln = (Xcal - Xcal.mean()) / normalization_factor
SEL = np.zeros((m_max, K))
with Bar('Projections :', max=K) as bar:
for k in range(K):
SEL[:, k] = self._projections_qr(Xcaln, k, m_max)
bar.next()
ACCURACY = np.full((m_max + 1, K), -np.inf)
with Bar('Evaluating subsets:', max=K * (m_max - m_min + 1)) as bar:
for k in range(K):
for m in range(m_min, m_max + 1):
var_sel = SEL[:m, k].astype(int)
_, accuracy = self._validation(Xcal, ycal, var_sel, Xval, yval)
ACCURACY[m, k] = accuracy
bar.next()
m_sel = np.argmax(ACCURACY, axis=0)
k_sel = np.argmax(np.max(ACCURACY, axis=0))
var_sel_phase2 = SEL[:m_sel[k_sel], k_sel].astype(int)
# Final LightGBM Training
Xcal2 = Xcal.iloc[:, var_sel_phase2]
train_data = lgb.Dataset(Xcal2, label=ycal)
params = {
'objective': 'multiclass', # 多分类任务的目标函数
'boosting_type': 'gbdt',
'metric': 'multi_logloss', # 使用多分类交叉熵损失
'device': 'gpu', # 启用GPU加速
'verbosity': -1,
'num_class': len(np.unique(ycal)) # 设置类别数量
}
model = lgb.train(params, train_data, num_boost_round=100)
relev = model.feature_importance(importance_type='gain')
index_decreasing_relev = np.argsort(-relev)
ACCURACY_scree = np.empty(len(var_sel_phase2))
for i in range(len(var_sel_phase2)):
var_sel = var_sel_phase2[index_decreasing_relev[:i + 1]]
_, accuracy = self._validation(Xcal, ycal, var_sel, Xval, yval)
ACCURACY_scree[i] = accuracy
# 绘图
plt.figure()
plt.rcParams['font.sans-serif'] = ['Times New Roman']
plt.xlabel('Number of variables included in the model', fontsize=14)
plt.ylabel('Accuracy', fontsize=14)
plt.title(f'Final number of selected variables: {len(var_sel_phase2)} (Accuracy={ACCURACY_scree.max():.4f})', fontsize=16)
plt.plot(ACCURACY_scree, label='Accuracy Scree Plot')
plt.scatter(np.argmax(ACCURACY_scree), ACCURACY_scree.max(), color='r', marker='s', label='Selected Point')
plt.grid(True)
plt.legend()
if save_path:
plt.savefig(save_path, bbox_inches='tight', dpi=300)
print(f"图像已保存至: {save_path}")
else:
plt.show()
return var_sel_phase2, ACCURACY_scree
def __repr__(self):
return "SPA()"