初始提交

2026-02-25 09:42:51 +08:00
parent c25276c481
commit d84d886f35
182 changed files with 18438 additions and 0 deletions
--- a/classification_model/WaveSelect/Cars.py
+++ b/classification_model/WaveSelect/Cars.py
@ -0,0 +1,176 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+import copy
+from sklearn.cross_decomposition import PLSRegression
+from sklearn.metrics import mean_squared_error
+from sklearn.model_selection import KFold
+
+
+def PC_Cross_Validation(X, y, pc, cv):
+    '''
+        X : 光谱矩阵 (DataFrame) nxm
+        y : 浓度阵 (Series) （化学值）
+        pc: 最大主成分数
+        cv: 交叉验证数量
+    return :
+        RMSECV: 各主成分数对应的RMSECV
+        rindex: 最佳主成分数
+    '''
+    kf = KFold(n_splits=cv)
+    RMSECV = []
+    for i in range(pc):
+        RMSE = []
+        for train_index, test_index in kf.split(X):
+            x_train, x_test = X.iloc[train_index], X.iloc[test_index]
+            y_train, y_test = y.iloc[train_index], y.iloc[test_index]
+            pls = PLSRegression(n_components=i + 1)
+            pls.fit(x_train, y_train)
+            y_predict = pls.predict(x_test)
+            RMSE.append(np.sqrt(mean_squared_error(y_test, y_predict)))
+        RMSE_mean = np.mean(RMSE)
+        RMSECV.append(RMSE_mean)
+    rindex = np.argmin(RMSECV)
+    return RMSECV, rindex
+
+
+def Cross_Validation(X, y, pc, cv):
+    '''
+     X : 光谱矩阵 (DataFrame) nxm
+     y : 浓度阵 (Series) （化学值）
+     pc: 最大主成分数
+     cv: 交叉验证数量
+     return :
+            RMSECV: 各主成分数对应的RMSECV
+    '''
+    kf = KFold(n_splits=cv)
+    RMSE = []
+    for train_index, test_index in kf.split(X):
+        x_train, x_test = X.iloc[train_index], X.iloc[test_index]
+        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
+        pls = PLSRegression(n_components=pc)
+        pls.fit(x_train, y_train)
+        y_predict = pls.predict(x_test)
+        RMSE.append(np.sqrt(mean_squared_error(y_test, y_predict)))
+    RMSE_mean = np.mean(RMSE)
+    return RMSE_mean
+
+
+def CARS_Cloud(X, y, N=50, f=20, cv=10, save_fig=False, save_path=None):
+    '''
+    X : 光谱矩阵 (DataFrame 或 ndarray)
+    y : 浓度阵 (Series 或 ndarray)
+    N : 蒙特卡洛迭代次数
+    f : 最大特征数
+    cv : 交叉验证的次数
+    save_fig : 是否保存图像
+    save_path : 图像保存路径
+    return :
+        OptWave : 选择的波长
+    '''
+    p = 0.8
+    m, n = X.shape
+    u = np.power((n / 2), (1 / (N - 1)))
+    k = (1 / (N - 1)) * np.log(n / 2)
+    cal_num = np.round(m * p)
+    b2 = np.arange(n)
+    x = X  # 将 DataFrame 转换为 numpy 数组
+    y = y # 将 Series 转换为 numpy 数组
+    D = np.vstack((np.array(b2).reshape(1, -1), x))
+    WaveData = []
+    WaveNum = []
+    RMSECV = []
+    r = []
+
+    for i in range(1, N + 1):
+        r.append(u * np.exp(-1 * k * i))
+        wave_num = int(np.round(r[i - 1] * n))
+        WaveNum = np.hstack((WaveNum, wave_num))
+        cal_index = np.random.choice(np.arange(m), size=int(cal_num), replace=False)
+        wave_index = b2[:wave_num].reshape(1, -1)[0]
+
+        # 使用 np.ix_ 来进行行列索引
+        xcal = x[np.ix_(cal_index, wave_index)]  # 选择对应的行和列
+        ycal = y[cal_index]  # 选择对应的 y
+
+        # 将 ycal 转换为一维数组
+        ycal = ycal.ravel()  # 使其成为一维数组
+
+        x = x[:, wave_index]  # 更新 x
+        D = D[:, wave_index]  # 更新 D
+        d = D[0, :].reshape(1, -1)
+        wnum = n - wave_num
+        if wnum > 0:
+            d = np.hstack((d, np.full((1, wnum), -1)))
+        if len(WaveData) == 0:
+            WaveData = d
+        else:
+            WaveData = np.vstack((WaveData, d.reshape(1, -1)))
+
+        if wave_num < f:
+            f = wave_num
+
+        pls = PLSRegression(n_components=f)
+        pls.fit(xcal, ycal)
+        beta = pls.coef_
+
+        # 针对新版sklearn处理 coef_ 的方式
+        if beta.shape[0] == 1:  # 新版sklearn，(1, x)
+            b = np.abs(beta[0])  # 从第一行提取数据
+            coeff = beta[0, b2]  # 修改为beta[0, b2]，因为coef只有一行
+        else:  # 旧版sklearn，(x, 1)
+            b = np.abs(beta[:, 0])  # 从列中提取数据
+            coeff = beta[b2, 0]  # 修改为beta[b2, 0]，因为coef只有一列
+
+        b2 = np.argsort(-b, axis=0)
+        coef = copy.deepcopy(beta)
+        coeff = coef[b2, :].reshape(len(b2), -1)
+        rmsecv, rindex = PC_Cross_Validation(pd.DataFrame(xcal), pd.Series(ycal), f, cv)
+        RMSECV.append(Cross_Validation(pd.DataFrame(xcal), pd.Series(ycal), rindex + 1, cv))
+
+    WAVE = []
+    for i in range(WaveData.shape[0]):
+        wd = WaveData[i, :]
+        WD = np.ones((len(wd)))
+        for j in range(len(wd)):
+            ind = np.where(wd == j)
+            if len(ind[0]) == 0:
+                WD[j] = 0
+            else:
+                WD[j] = wd[ind[0]]
+        if len(WAVE) == 0:
+            WAVE = copy.deepcopy(WD)
+        else:
+            WAVE = np.vstack((WAVE, WD.reshape(1, -1)))
+
+    MinIndex = np.argmin(RMSECV)
+    Optimal = WAVE[MinIndex, :]
+    boindex = np.where(Optimal != 0)
+    OptWave = boindex[0]
+
+    plt.figure(figsize=(12, 10))
+    # 设置字体为新罗马
+    plt.rcParams['font.sans-serif'] = ['Times New Roman']  # 使用 Times New Roman 字体
+    plt.rcParams['axes.unicode_minus'] = False  # 解决负号显示问题
+    fonts = 20
+
+    plt.subplot(211)
+    plt.xlabel('Monte Carlo Iterations', fontsize=fonts)
+    plt.ylabel('Number of Selected Wavelengths', fontsize=fonts)
+    plt.title('Optimal Iteration: ' + str(MinIndex), fontsize=fonts)
+    plt.plot(np.arange(N), WaveNum)
+
+    plt.subplot(212)
+    plt.xlabel('Monte Carlo Iterations', fontsize=fonts)
+    plt.ylabel('RMSECV', fontsize=fonts)
+    plt.plot(np.arange(N), RMSECV)
+
+    # 保存图像
+    if save_fig:
+        plt.savefig(save_path)  # 保存图像到文件
+        print(f"The figure has been saved as {save_path}")
+
+
+    # plt.show()
+
+    return OptWave
--- a/classification_model/WaveSelect/GA.py
+++ b/classification_model/WaveSelect/GA.py
@ -0,0 +1,59 @@
+from deap import base, creator, tools, algorithms
+import numpy as np
+from sklearn.datasets import make_classification
+from sklearn.model_selection import cross_val_score
+from sklearn.ensemble import RandomForestClassifier
+
+
+def GA(X, y, n_generations=20, population_size=50, crossover_prob=0.7, mutation_prob=0.2):
+    """
+    使用遗传算法进行特征选择，返回选择的特征索引。
+
+    参数:
+    X (ndarray): 特征矩阵
+    y (ndarray): 标签
+    n_generations (int): 迭代次数
+    population_size (int): 种群大小
+    crossover_prob (float): 交叉概率
+    mutation_prob (float): 变异概率
+
+    返回:
+    list: 选择的特征索引
+    """
+    # 创建适应度和个体
+    creator.create("FitnessMax", base.Fitness, weights=(1.0,))
+    creator.create("Individual", list, fitness=creator.FitnessMax)
+
+    toolbox = base.Toolbox()
+    toolbox.register("attr_bool", lambda: np.random.randint(0, 2))
+    toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, n=X.shape[1])
+    toolbox.register("population", tools.initRepeat, list, toolbox.individual)
+
+    # 定义适应度函数
+    def evaluate(individual):
+        selected_features = [index for index, val in enumerate(individual) if val == 1]
+        if not selected_features:
+            return 0,  # 没有特征时适应度为 0
+        X_selected = X[:, selected_features]
+        clf = RandomForestClassifier(random_state=42)
+        score = cross_val_score(clf, X_selected, y, cv=5).mean()  # 5 折交叉验证
+        return score,
+
+    toolbox.register("evaluate", evaluate)
+    toolbox.register("mate", tools.cxTwoPoint)
+    toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
+    toolbox.register("select", tools.selTournament, tournsize=3)
+
+    # 初始化种群
+    population = toolbox.population(n=population_size)
+
+    # 运行遗传算法
+    result_population, _ = algorithms.eaSimple(population, toolbox, cxpb=crossover_prob,
+                                               mutpb=mutation_prob, ngen=n_generations,
+                                               verbose=False)
+
+    # 获取最优个体
+    best_individual = tools.selBest(result_population, k=1)[0]
+    selected_features = [index for index, val in enumerate(best_individual) if val == 1]
+
+    return selected_features
--- a/classification_model/WaveSelect/Lar.py
+++ b/classification_model/WaveSelect/Lar.py
@ -0,0 +1,41 @@
+"""
+    -*- coding: utf-8 -*-
+    @Time   :2022/04/12 17:10
+    @Author : Pengyou FU
+    @blogs  : https://blog.csdn.net/Echo_Code?spm=1000.2115.3001.5343
+    @github : https://github.com/FuSiry/OpenSA
+    @WeChat : Fu_siry
+    @License：Apache-2.0 license
+"""
+
+from sklearn import linear_model
+import numpy as np
+
+def Lar(X, y, nums=40):
+    """
+    使用 LARS（Least Angle Regression）选择重要的特征波长。
+
+    参数：
+        X : np.ndarray，预测变量矩阵（输入数据）
+        y : np.ndarray，标签（目标值）
+        nums : int，选择的特征点数量，默认为 40
+
+    返回：
+        np.ndarray，选择的特征波长索引
+    """
+    # 初始化 LARS 模型
+    Lars = linear_model.Lars()
+
+    # 拟合模型
+    Lars.fit(X, y)
+
+    # 获取回归系数的绝对值，表示特征的重要性
+    corflist = np.abs(Lars.coef_)
+
+    # 将系数转换为数组并按重要性排序，选择前 nums 个最重要的特征
+    SpectrumList = np.argsort(corflist)[-nums:][::-1]
+
+    # 对选择的特征索引进行排序，保证顺序一致
+    SpectrumList = np.sort(SpectrumList)
+
+    return SpectrumList
--- a/classification_model/WaveSelect/MRMR.py
+++ b/classification_model/WaveSelect/MRMR.py
@ -0,0 +1,49 @@
+import pymrmr
+import pandas as pd
+
+
+class MRMRFeatureSelection:
+    def __init__(self, X, y):
+        """
+        初始化 mRMR 特征选择模块。
+
+        :param X: 输入特征矩阵 (DataFrame)，每列为一个特征。
+        :param y: 目标变量 (Series)，与特征矩阵 X 对应。
+        """
+        self.X = X
+        self.y = y
+        self.selected_features = None
+
+    def select_features(self, k=18, method='MIQ'):
+        """
+        执行 mRMR 特征选择。
+
+        :param k: 选择的特征个数。
+        :param method: 选择的 mRMR 方法 ('MIQ' 或 'MRMR')。
+        :return: 选择的特征列表
+        """
+        # 拼接特征和目标变量
+        df = pd.concat([self.y, self.X], axis=1)
+
+        # 使用 pymrmr 进行 mRMR 特征选择
+        self.selected_features = pymrmr.mRMR(df, method, k)
+
+        return self.selected_features
+
+    def get_selected_features(self):
+        """
+        获取已选择的特征。
+
+        :return: 选择的特征
+        """
+        return self.selected_features
+
+    def get_selected_feature_names(self):
+        """
+        获取已选择特征的列名
+
+        :return: 选择的特征列名列表
+        """
+        if self.selected_features is None:
+            return None
+        return self.selected_features
--- a/classification_model/WaveSelect/Pca.py
+++ b/classification_model/WaveSelect/Pca.py
@ -0,0 +1,24 @@
+"""
+    -*- coding: utf-8 -*-
+    @Time   :2022/04/12 17:10
+    @Author : Pengyou FU
+    @blogs  : https://blog.csdn.net/Echo_Code?spm=1000.2115.3001.5343
+    @github : https://github.com/FuSiry/OpenSA
+    @WeChat : Fu_siry
+    @License：Apache-2.0 license
+
+"""
+
+from sklearn.decomposition import PCA
+
+def Pca(X, nums=20):
+    """
+       :param X: raw spectrum data, shape (n_samples, n_features)
+       :param nums: Number of principal components retained
+       :return: X_reduction：Spectral data after dimensionality reduction
+    """
+    pca = PCA(n_components=nums)  # 保留的特征数码
+    pca.fit(X)
+    X_reduction = pca.transform(X)
+
+    return X_reduction
--- a/classification_model/WaveSelect/ReliefF.py
+++ b/classification_model/WaveSelect/ReliefF.py
@ -0,0 +1,88 @@
+
+import numpy as np
+from sklearn.neighbors import NearestNeighbors
+class ReliefF:
+    def __init__(self, n_neighbors=20, n_features_to_keep=20):
+        """
+        初始化 ReliefF 算法参数。
+        :param n_neighbors: 每个样本的近邻数量。
+        :param n_features_to_keep: 每次保留的特征数量。
+        """
+        self.n_neighbors = n_neighbors
+        self.n_features_to_keep = n_features_to_keep
+        self.feature_scores = None  # 用于存储每个特征的评分
+        self.top_features = None  # 用于存储评分最高的特征索引
+
+    def fit(self, X, y):
+        """
+        根据给定的数据 X 和标签 y 计算特征评分。
+        :param X: 输入特征矩阵。
+        :param y: 类别标签。
+        :return: 返回选择的特征索引。
+        """
+        m, n = X.shape  # m 是样本数，n 是特征数
+
+        self.feature_scores = np.zeros(n)  # 初始化特征评分为 0
+
+        # 寻找每个样本的 n_neighbors 个近邻
+        nbrs = NearestNeighbors(n_neighbors=self.n_neighbors + 1).fit(X)
+        distances, indices = nbrs.kneighbors(X)
+
+        # 遍历每个样本，更新特征评分
+        for i in range(m):
+            y_i = y[i]  # 当前样本的类别标签
+
+            # 初始化同类和异类邻居
+            hit_neighbors = []
+            miss_neighbors = []
+
+            for j in indices[i][1:]:  # indices[i][0] 是样本自身，跳过
+                if y[j] == y_i:
+                    hit_neighbors.append(X[j])
+                else:
+                    miss_neighbors.append(X[j])
+
+            # 更新每个特征的评分
+            for f in range(n):
+                for hit in hit_neighbors:
+                    self.feature_scores[f] -= (X[i, f] - hit[f]) ** 2 / (self.n_neighbors * m)
+                for miss in miss_neighbors:
+                    self.feature_scores[f] += (X[i, f] - miss[f]) ** 2 / (self.n_neighbors * m)
+
+        # 选择评分最高的 n_features_to_keep 个特征的索引
+        self.top_features = np.argsort(self.feature_scores)[-self.n_features_to_keep:]
+
+        return self.top_features  # 返回选择的特征索引
+
+    def fit_transform(self, X, y):
+        """一步完成拟合和转换，返回选择的特征索引。"""
+        return self.fit(X, y)
+
+def multi_scale_relieff_stratified(X, y, segment_size=100, n_subsegments=20, n_features_per_subsegment=5):
+    """
+    分层多尺度特征选择，确保每个波长段都能被覆盖。
+    :param X: 输入特征矩阵。
+    :param y: 类别标签。
+    :param segment_size: 每个波长段的大小。
+    :param n_subsegments: 每个段内的子区域数量。
+    :param n_features_per_subsegment: 每个子区域选择的特征数量。
+    :return: 分层选择的特征索引。
+    """
+    selected_features = []
+
+    # 遍历每个波长段
+    for i in range(0, X.shape[1], segment_size):
+        segment_X = X[:, i:i + segment_size]
+        subsegment_size = segment_size // n_subsegments  # 子区域大小
+
+        # 在每个子区域内进行特征选择
+        for j in range(0, segment_size, subsegment_size):
+            subsegment_X = segment_X[:, j:j + subsegment_size]
+            relief = ReliefF(n_neighbors=10, n_features_to_keep=n_features_per_subsegment)
+            subsegment_selected = relief.fit_transform(subsegment_X, y)
+
+            # 将局部索引转换为全局索引并添加到结果中
+            selected_features.extend(subsegment_selected + i + j)
+
+    # 返回去重后的特征索引
+    return np.unique(selected_features)
--- a/classification_model/WaveSelect/Spa.py
+++ b/classification_model/WaveSelect/Spa.py
@ -0,0 +1,116 @@
+import scipy.stats
+import numpy as np
+from scipy.linalg import qr, inv, pinv
+import scipy.stats
+from progress.bar import Bar
+from matplotlib import pyplot as plt
+class SPA:
+    def _projections_qr(self, X, k, M):
+        X_projected = X.copy()
+        norms = np.sum((X ** 2), axis=0)
+        norm_max = np.amax(norms)
+        X_projected.iloc[:, k] = X_projected.iloc[:, k] * 2 * norm_max / norms[k]
+        _, __, order = qr(X_projected.to_numpy(), 0, pivoting=True)
+        return order[:M].T
+
+    def _validation(self, Xcal, ycal, var_sel, Xval=None, yval=None):
+        N = Xcal.shape[0]
+        NV = Xval.shape[0] if Xval is not None else 0
+
+        yhat, e = None, None
+        if NV > 0:
+            Xcal_ones = np.hstack([np.ones((N, 1)), Xcal.iloc[:, var_sel].to_numpy()])
+            b = np.linalg.lstsq(Xcal_ones, ycal, rcond=None)[0]
+            Xval_ones = np.hstack([np.ones((NV, 1)), Xval.iloc[:, var_sel].to_numpy()])
+            yhat = Xval_ones.dot(b)
+            e = yval - yhat
+        else:
+            yhat = np.zeros((N, 1))
+            for i in range(N):
+                cal = np.hstack([np.arange(i), np.arange(i + 1, N)])
+                X = Xcal.iloc[cal, var_sel]
+                y = ycal.iloc[cal]
+                X_ones = np.hstack([np.ones((N - 1, 1)), X.to_numpy()])
+                b = np.linalg.lstsq(X_ones, y, rcond=None)[0]
+                xtest = Xcal.iloc[i, var_sel].to_numpy()
+                yhat[i] = np.hstack([1, xtest]).dot(b)
+            e = ycal.to_numpy() - yhat
+        return yhat, e
+
+    def spa(self, Xcal, ycal, m_min=1, m_max=None, Xval=None, yval=None, autoscaling=1, save_path=None):
+        N, K = Xcal.shape
+        m_max = min(N - 1, K) if m_max is None else m_max
+
+        normalization_factor = Xcal.std(ddof=1, axis=0) if autoscaling else np.ones(K)
+        Xcaln = (Xcal - Xcal.mean()) / normalization_factor
+
+        SEL = np.zeros((m_max, K))
+        with Bar('Projections :', max=K) as bar:
+            for k in range(K):
+                SEL[:, k] = self._projections_qr(Xcaln, k, m_max)
+                bar.next()
+
+        PRESS = np.full((m_max + 1, K), np.inf)
+        with Bar('Evaluating subsets:', max=K * (m_max - m_min + 1)) as bar:
+            for k in range(K):
+                for m in range(m_min, m_max + 1):
+                    var_sel = SEL[:m, k].astype(int)
+                    _, e = self._validation(Xcal, ycal, var_sel, Xval, yval)
+                    PRESS[m, k] = e.T @ e
+                    bar.next()
+
+        m_sel = np.argmin(PRESS, axis=0)
+        k_sel = np.argmin(np.min(PRESS, axis=0))
+        var_sel_phase2 = SEL[:m_sel[k_sel], k_sel].astype(int)
+
+        Xcal2 = np.hstack([np.ones((N, 1)), Xcal.iloc[:, var_sel_phase2].to_numpy()])
+        b = np.linalg.lstsq(Xcal2, ycal, rcond=None)[0]
+        std_deviation = Xcal2.std(ddof=1, axis=0)
+        relev = np.abs(b * std_deviation)[1:]
+
+        index_decreasing_relev = np.argsort(-relev)
+        PRESS_scree = np.empty(len(var_sel_phase2))
+        for i in range(len(var_sel_phase2)):
+            var_sel = var_sel_phase2[index_decreasing_relev[:i + 1]]
+            _, e = self._validation(Xcal, ycal, var_sel, Xval, yval)
+            PRESS_scree[i] = np.conj(e).T @ e
+
+        RMSEP_scree = np.sqrt(PRESS_scree / len(e))
+        alpha = 0.25
+        dof = len(e)
+        fcrit = scipy.stats.f.ppf(1 - alpha, dof, dof)
+        PRESS_crit = np.min(PRESS_scree) * fcrit
+        i_crit = np.min(np.nonzero(PRESS_scree < PRESS_crit))
+        i_crit = max(m_min, i_crit)
+        var_sel = var_sel_phase2[index_decreasing_relev[:i_crit]]
+
+        # 绘图
+        plt.figure()
+
+        # 设置字体为 Times New Roman
+        plt.rcParams['font.sans-serif'] = ['Times New Roman']
+        plt.rcParams['axes.unicode_minus'] = False  # 确保负号显示正常
+
+        # 设置标题、标签和网格
+        plt.xlabel('Number of variables included in the model', fontsize=14)
+        plt.ylabel('RMSE', fontsize=14)
+        plt.title(f'Final number of selected variables: {len(var_sel)} (RMSE={RMSEP_scree[i_crit]:.4f})', fontsize=16)
+
+        # 绘制 RMSEP 曲线
+        plt.plot(RMSEP_scree, label='RMSEP Scree Plot')
+        plt.scatter(i_crit, RMSEP_scree[i_crit], color='r', marker='s', label='Selected Point')
+
+        # 添加网格和图例
+        plt.grid(True)
+        plt.legend()
+
+        # 显示或保存图像
+        if save_path:
+            plt.savefig(save_path, bbox_inches='tight', dpi=300)
+            print(f"图像已保存至: {save_path}")
+        else:
+            plt.show()
+        return var_sel, var_sel_phase2
+
+    def __repr__(self):
+        return "SPA()"
--- a/classification_model/WaveSelect/Spa_acc.py
+++ b/classification_model/WaveSelect/Spa_acc.py
@ -0,0 +1,111 @@
+import lightgbm as lgb
+import numpy as np
+from scipy.linalg import qr
+from progress.bar import Bar
+from matplotlib import pyplot as plt
+from sklearn.metrics import accuracy_score
+
+
+class SPA_acc:
+    def _projections_qr(self, X, k, M):
+        X_projected = X.copy()
+        norms = np.sum((X ** 2), axis=0)
+        norm_max = np.amax(norms)
+        X_projected.iloc[:, k] = X_projected.iloc[:, k] * 2 * norm_max / norms[k]
+        _, __, order = qr(X_projected.to_numpy(), 0, pivoting=True)
+        return order[:M].T
+
+    def _validation(self, Xcal, ycal, var_sel, Xval=None, yval=None):
+        train_data = lgb.Dataset(Xcal.iloc[:, var_sel], label=ycal)
+        params = {
+            'objective': 'multiclass',  # 多分类任务的目标函数
+            'boosting_type': 'gbdt',
+            'metric': 'multi_logloss',  # 使用多分类交叉熵损失
+            'device': 'gpu',  # 启用GPU加速
+            'verbosity': -1,
+            'num_class': len(np.unique(ycal))  # 设置类别数量
+        }
+
+        # LightGBM 模型训练
+        model = lgb.train(params, train_data, num_boost_round=100)
+
+        # 在验证集或训练集上进行预测
+        if Xval is not None and yval is not None:
+            yhat = model.predict(Xval.iloc[:, var_sel])
+            yhat = np.argmax(yhat, axis=1)  # 获取每个样本的最大概率类别
+            accuracy = accuracy_score(yval, yhat)
+        else:
+            yhat = model.predict(Xcal.iloc[:, var_sel])
+            yhat = np.argmax(yhat, axis=1)  # 获取每个样本的最大概率类别
+            accuracy = accuracy_score(ycal, yhat)
+
+        return yhat, accuracy
+
+    def spa(self, Xcal, ycal, m_min=1, m_max=None, Xval=None, yval=None, autoscaling=1, save_path=None):
+        N, K = Xcal.shape
+        m_max = min(N - 1, K) if m_max is None else m_max
+
+        normalization_factor = Xcal.std(ddof=1, axis=0) if autoscaling else np.ones(K)
+        Xcaln = (Xcal - Xcal.mean()) / normalization_factor
+
+        SEL = np.zeros((m_max, K))
+        with Bar('Projections :', max=K) as bar:
+            for k in range(K):
+                SEL[:, k] = self._projections_qr(Xcaln, k, m_max)
+                bar.next()
+
+        ACCURACY = np.full((m_max + 1, K), -np.inf)
+        with Bar('Evaluating subsets:', max=K * (m_max - m_min + 1)) as bar:
+            for k in range(K):
+                for m in range(m_min, m_max + 1):
+                    var_sel = SEL[:m, k].astype(int)
+                    _, accuracy = self._validation(Xcal, ycal, var_sel, Xval, yval)
+                    ACCURACY[m, k] = accuracy
+                    bar.next()
+
+        m_sel = np.argmax(ACCURACY, axis=0)
+        k_sel = np.argmax(np.max(ACCURACY, axis=0))
+        var_sel_phase2 = SEL[:m_sel[k_sel], k_sel].astype(int)
+
+        # Final LightGBM Training
+        Xcal2 = Xcal.iloc[:, var_sel_phase2]
+        train_data = lgb.Dataset(Xcal2, label=ycal)
+        params = {
+            'objective': 'multiclass',  # 多分类任务的目标函数
+            'boosting_type': 'gbdt',
+            'metric': 'multi_logloss',  # 使用多分类交叉熵损失
+            'device': 'gpu',  # 启用GPU加速
+            'verbosity': -1,
+            'num_class': len(np.unique(ycal))  # 设置类别数量
+        }
+        model = lgb.train(params, train_data, num_boost_round=100)
+        relev = model.feature_importance(importance_type='gain')
+
+        index_decreasing_relev = np.argsort(-relev)
+        ACCURACY_scree = np.empty(len(var_sel_phase2))
+        for i in range(len(var_sel_phase2)):
+            var_sel = var_sel_phase2[index_decreasing_relev[:i + 1]]
+            _, accuracy = self._validation(Xcal, ycal, var_sel, Xval, yval)
+            ACCURACY_scree[i] = accuracy
+
+        # 绘图
+        plt.figure()
+        plt.rcParams['font.sans-serif'] = ['Times New Roman']
+        plt.xlabel('Number of variables included in the model', fontsize=14)
+        plt.ylabel('Accuracy', fontsize=14)
+        plt.title(f'Final number of selected variables: {len(var_sel_phase2)} (Accuracy={ACCURACY_scree.max():.4f})', fontsize=16)
+        plt.plot(ACCURACY_scree, label='Accuracy Scree Plot')
+        plt.scatter(np.argmax(ACCURACY_scree), ACCURACY_scree.max(), color='r', marker='s', label='Selected Point')
+        plt.grid(True)
+        plt.legend()
+
+        if save_path:
+            plt.savefig(save_path, bbox_inches='tight', dpi=300)
+            print(f"图像已保存至: {save_path}")
+        else:
+            plt.show()
+
+        return var_sel_phase2, ACCURACY_scree
+
+    def __repr__(self):
+        return "SPA()"
--- a/classification_model/WaveSelect/Uve.py
+++ b/classification_model/WaveSelect/Uve.py
@ -0,0 +1,92 @@
+"""
+    -*- coding: utf-8 -*-
+    @Time   :2022/04/12 17:10
+    @Author : Pengyou FU
+    @blogs  : https://blog.csdn.net/Echo_Code?spm=1000.2115.3001.5343
+    @github : https://github.com/FuSiry/OpenSA
+    @WeChat : Fu_siry
+    @License：Apache-2.0 license
+"""
+
+from sklearn.cross_decomposition import PLSRegression
+from sklearn.linear_model import LinearRegression
+from sklearn.model_selection import ShuffleSplit, cross_val_score
+from numpy.linalg import matrix_rank as rank
+import numpy as np
+
+
+class UVE:
+    def __init__(self, x, y, ncomp=20, nrep=500, testSize=0.2):
+        """
+        初始化 UVE 模型。
+
+        参数：
+            x : np.ndarray，预测变量矩阵（输入数据）
+            y : np.ndarray，标签（目标值）
+            ncomp : int，PLS 中的最大潜变量数量，默认为 20
+            nrep : int，重复次数，默认为 500
+            testSize : float，训练集中划分的测试集比例，默认为 0.2
+        """
+        self.x = x
+        self.y = y
+        self.ncomp = min(ncomp, rank(x))  # 确保潜变量数量不超过矩阵秩
+        self.nrep = nrep
+        self.testSize = testSize
+
+        self.criteria = None  # 存储标准化系数
+        self.featureIndex = None  # 存储特征排序索引
+        self.featureR2 = np.full(self.x.shape[1], np.nan)  # 存储 R² 值
+        self.selFeature = None  # 存储最终选择的特征索引
+
+    def calcCriteria(self):
+        """计算每个变量的标准化系数 (meanCoef / stdCoef)。"""
+        PLSCoef = np.zeros((self.nrep, self.x.shape[1]))  # 存储每次迭代的 PLS 系数
+        ss = ShuffleSplit(n_splits=self.nrep, test_size=self.testSize)
+
+        # 遍历每次划分的数据集，计算 PLS 系数
+        for step, (train, test) in enumerate(ss.split(self.x, self.y)):
+            xtrain, ytrain = self.x[train], self.y[train]
+            plsModel = PLSRegression(n_components=min(self.ncomp, rank(xtrain)))
+            plsModel.fit(xtrain, ytrain)
+            PLSCoef[step, :] = plsModel.coef_.flatten()
+
+        # 使用 np.divide 处理除法，避免除以零的问题
+        meanCoef = np.mean(PLSCoef, axis=0)
+        stdCoef = np.std(PLSCoef, axis=0)
+        self.criteria = np.divide(meanCoef, stdCoef, out=np.zeros_like(meanCoef), where=stdCoef != 0)
+
+    def evalCriteria(self, cv=3):
+        """基于标准化系数评估每个变量组合的 R² 值。"""
+        # 按标准化系数的绝对值降序排序，获取特征的索引
+        self.featureIndex = np.argsort(-np.abs(self.criteria))
+
+        # 依次增加特征，计算每个组合的 R² 值
+        for i in range(self.x.shape[1]):
+            xi = self.x[:, self.featureIndex[:i + 1]]  # 选择前 i+1 个特征
+
+            # 根据特征数量选择回归模型
+            if i < self.ncomp:
+                regModel = LinearRegression()
+            else:
+                regModel = PLSRegression(n_components=min(self.ncomp, rank(xi)))
+
+            # 进行交叉验证并存储 R² 值
+            cvScore = cross_val_score(regModel, xi, self.y, cv=cv, scoring='r2')
+            self.featureR2[i] = np.mean(cvScore)
+
+    def cutFeature(self, *args):
+        """根据 R² 最大值选择特征，并返回所选特征的索引（列号）。"""
+        # 找到 R² 最大值对应的索引位置
+        cuti = np.nanargmax(self.featureR2)  # 使用 nanargmax 以避免 NaN 的影响
+        self.selFeature = self.featureIndex[:cuti + 1]  # 最优特征索引
+
+        # 如果传入其他数据集，返回筛选后的数据
+        if len(args) != 0:
+            returnx = list(args)
+            for i, argi in enumerate(args):
+                if argi.shape[1] == self.x.shape[1]:
+                    returnx[i] = argi[:, self.selFeature]
+            return returnx
+
+        # 返回所选特征的索引（列号）
+        return self.selFeature
--- a/classification_model/WaveSelect/WaveSelcet.py
+++ b/classification_model/WaveSelect/WaveSelcet.py
@ -0,0 +1,93 @@
+import pandas as pd
+import numpy as np
+from classification_model.WaveSelect.Lar import Lar
+from classification_model.WaveSelect.Spa import SPA
+from classification_model.WaveSelect.Spa_acc import SPA_acc
+from classification_model.WaveSelect.Uve import UVE
+from classification_model.WaveSelect.Cars import CARS_Cloud
+from classification_model.WaveSelect.Pca import Pca
+from classification_model.WaveSelect.GA import GA
+from classification_model.WaveSelect.ReliefF import ReliefF
+from sklearn.model_selection import train_test_split
+# from WaveSelect.MRMR import MRMRFeatureSelection
+import os
+import matplotlib.pyplot as plt
+def SpctrumFeatureSelcet(method, X, y, name='', result_dir='', column_names=None):
+    """
+    :param method: 波长筛选/降维的方法，包括：Cars, Lars, Uve, Spa, Pca。
+    :param X: 光谱数据，可以是 pandas DataFrame 或 numpy array (n_samples, n_features)。
+    :param y: 光谱数据对应的标签 (n_samples,)。
+    :param name: 结果图像的文件名。
+    :param result_dir: 保存结果的文件夹路径。
+    :param column_names: 如果 X 是 numpy array，需要提供列名列表。
+    :return:
+        - X_Feature: 选择/降维后的数据 (n_samples, n_features)。
+        - y: 对应的标签。
+        - selected_columns: 选择的特征列名或索引。
+    """
+    global X_Feature
+
+    # 判断输入数据类型并转换为 DataFrame（如有必要）
+    if isinstance(X, np.ndarray):
+        if column_names is None:
+            column_names = [f"{i}" for i in range(X.shape[1])]  # 默认列名
+        X_df = pd.DataFrame(X, columns=column_names)
+    else:
+        X_df = X
+
+    # 根据所选方法执行特征选择
+    if method == "None":
+        X_Feature = X_df
+        selected_columns = X_df.columns
+    elif method == "Cars":
+        save_path = os.path.join(result_dir, f"{name}_cars.png")
+        # 调用 CARS_Cloud 并获取结果
+        Featuresecletidx = CARS_Cloud(X_df.values, y, N=50, f=20, cv=10,  save_fig=True,save_path=save_path)
+        Featuresecletidx = Featuresecletidx.astype(int)
+        X_Feature = X_df.iloc[:, Featuresecletidx]
+        selected_columns = Featuresecletidx
+
+    elif method == "Lars":
+        Featuresecletidx = Lar(X_df.values, y)
+        X_Feature = X_df.iloc[:, Featuresecletidx]
+        selected_columns = X_df.columns[Featuresecletidx]
+    elif method == "Uve":
+        uve = UVE(X_df.values, y, 20)
+        uve.calcCriteria()
+        uve.evalCriteria(cv=5)
+        Featuresecletidx = uve.cutFeature()  # 返回所选特征的索引
+        X_Feature = X_df.iloc[:, Featuresecletidx]
+        selected_columns = X_df.columns[Featuresecletidx]
+    elif method == "Spa":
+        save_path = os.path.join(result_dir, f"{name}_spa.png")
+
+        Xcal, Xval, ycal, yval = train_test_split(X_df, y, test_size=0.3)
+        Featuresecletidx, var_sel_phase2 = SPA().spa(
+            Xcal, ycal, m_min=2, m_max=50, Xval=Xval, yval=yval, autoscaling=1,save_path=save_path)
+        X_Feature = X_df.iloc[:, Featuresecletidx]
+        selected_columns = X_df.columns[Featuresecletidx]
+    elif method == "Spa_acc":
+        save_path = os.path.join(result_dir, f"{name}_spa_acc.png")
+
+        Xcal, Xval, ycal, yval = train_test_split(X_df, y, test_size=0.3)
+        Featuresecletidx, var_sel_phase2 = SPA_acc().spa(
+            Xcal, ycal, m_min=2, m_max=50, Xval=Xval, yval=yval, autoscaling=1,save_path=save_path)
+        X_Feature = X_df.iloc[:, Featuresecletidx]
+        selected_columns = X_df.columns[Featuresecletidx]
+    elif method == "GA":
+        Featuresecletidx = GA(X_df.values, y, 10)
+        X_Feature = X_df.iloc[:, Featuresecletidx]
+        selected_columns = X_df.columns[Featuresecletidx]
+    elif method == "Pca":
+        X_Feature = Pca(X_df.values)
+        selected_columns = [f"PC{i+1}" for i in range(X_Feature.shape[1])]
+    elif method == "ReliefF":
+        relieff = ReliefF(n_neighbors=20, n_features_to_keep=20)
+        Featuresecletidx = relieff.fit(X_df.values, y)
+        X_Feature = X_df.iloc[:, Featuresecletidx]
+        selected_columns = X_df.columns[Featuresecletidx]
+    else:
+        print("没有这个波长筛选方法！")
+        return None, None
+
+    return X_Feature, y, selected_columns  # 返回所选特征数据、标签和列名
--- a/classification_model/WaveSelect/pycache/Cars.cpython-310.pyc
+++ b/classification_model/WaveSelect/pycache/Cars.cpython-310.pyc
--- a/classification_model/WaveSelect/pycache/Cars.cpython-311.pyc
+++ b/classification_model/WaveSelect/pycache/Cars.cpython-311.pyc
--- a/classification_model/WaveSelect/pycache/Cars.cpython-312.pyc
+++ b/classification_model/WaveSelect/pycache/Cars.cpython-312.pyc
--- a/classification_model/WaveSelect/pycache/Cars.cpython-38.pyc
+++ b/classification_model/WaveSelect/pycache/Cars.cpython-38.pyc
--- a/classification_model/WaveSelect/pycache/Cars.cpython-39.pyc
+++ b/classification_model/WaveSelect/pycache/Cars.cpython-39.pyc
--- a/classification_model/WaveSelect/pycache/GA.cpython-310.pyc
+++ b/classification_model/WaveSelect/pycache/GA.cpython-310.pyc
--- a/classification_model/WaveSelect/pycache/GA.cpython-311.pyc
+++ b/classification_model/WaveSelect/pycache/GA.cpython-311.pyc
--- a/classification_model/WaveSelect/pycache/GA.cpython-312.pyc
+++ b/classification_model/WaveSelect/pycache/GA.cpython-312.pyc
--- a/classification_model/WaveSelect/pycache/GA.cpython-38.pyc
+++ b/classification_model/WaveSelect/pycache/GA.cpython-38.pyc
--- a/classification_model/WaveSelect/pycache/GA.cpython-39.pyc
+++ b/classification_model/WaveSelect/pycache/GA.cpython-39.pyc
--- a/classification_model/WaveSelect/pycache/Lar.cpython-310.pyc
+++ b/classification_model/WaveSelect/pycache/Lar.cpython-310.pyc
--- a/classification_model/WaveSelect/pycache/Lar.cpython-311.pyc
+++ b/classification_model/WaveSelect/pycache/Lar.cpython-311.pyc
--- a/classification_model/WaveSelect/pycache/Lar.cpython-312.pyc
+++ b/classification_model/WaveSelect/pycache/Lar.cpython-312.pyc
--- a/classification_model/WaveSelect/pycache/Lar.cpython-38.pyc
+++ b/classification_model/WaveSelect/pycache/Lar.cpython-38.pyc
--- a/classification_model/WaveSelect/pycache/Lar.cpython-39.pyc
+++ b/classification_model/WaveSelect/pycache/Lar.cpython-39.pyc
--- a/classification_model/WaveSelect/pycache/MRMR.cpython-311.pyc
+++ b/classification_model/WaveSelect/pycache/MRMR.cpython-311.pyc
--- a/classification_model/WaveSelect/pycache/MRMR.cpython-312.pyc
+++ b/classification_model/WaveSelect/pycache/MRMR.cpython-312.pyc
--- a/classification_model/WaveSelect/pycache/Pca.cpython-310.pyc
+++ b/classification_model/WaveSelect/pycache/Pca.cpython-310.pyc
--- a/classification_model/WaveSelect/pycache/Pca.cpython-311.pyc
+++ b/classification_model/WaveSelect/pycache/Pca.cpython-311.pyc
--- a/classification_model/WaveSelect/pycache/Pca.cpython-312.pyc
+++ b/classification_model/WaveSelect/pycache/Pca.cpython-312.pyc
--- a/classification_model/WaveSelect/pycache/Pca.cpython-38.pyc
+++ b/classification_model/WaveSelect/pycache/Pca.cpython-38.pyc
--- a/classification_model/WaveSelect/pycache/Pca.cpython-39.pyc
+++ b/classification_model/WaveSelect/pycache/Pca.cpython-39.pyc
--- a/classification_model/WaveSelect/pycache/ReliefF.cpython-310.pyc
+++ b/classification_model/WaveSelect/pycache/ReliefF.cpython-310.pyc
--- a/classification_model/WaveSelect/pycache/ReliefF.cpython-311.pyc
+++ b/classification_model/WaveSelect/pycache/ReliefF.cpython-311.pyc
--- a/classification_model/WaveSelect/pycache/ReliefF.cpython-312.pyc
+++ b/classification_model/WaveSelect/pycache/ReliefF.cpython-312.pyc
--- a/classification_model/WaveSelect/pycache/Spa.cpython-310.pyc
+++ b/classification_model/WaveSelect/pycache/Spa.cpython-310.pyc
--- a/classification_model/WaveSelect/pycache/Spa.cpython-311.pyc
+++ b/classification_model/WaveSelect/pycache/Spa.cpython-311.pyc
--- a/classification_model/WaveSelect/pycache/Spa.cpython-312.pyc
+++ b/classification_model/WaveSelect/pycache/Spa.cpython-312.pyc
--- a/classification_model/WaveSelect/pycache/Spa.cpython-38.pyc
+++ b/classification_model/WaveSelect/pycache/Spa.cpython-38.pyc
--- a/classification_model/WaveSelect/pycache/Spa.cpython-39.pyc
+++ b/classification_model/WaveSelect/pycache/Spa.cpython-39.pyc
--- a/classification_model/WaveSelect/pycache/Spa_acc.cpython-310.pyc
+++ b/classification_model/WaveSelect/pycache/Spa_acc.cpython-310.pyc
--- a/classification_model/WaveSelect/pycache/Spa_acc.cpython-311.pyc
+++ b/classification_model/WaveSelect/pycache/Spa_acc.cpython-311.pyc
--- a/classification_model/WaveSelect/pycache/Spa_acc.cpython-312.pyc
+++ b/classification_model/WaveSelect/pycache/Spa_acc.cpython-312.pyc
--- a/classification_model/WaveSelect/pycache/Uve.cpython-310.pyc
+++ b/classification_model/WaveSelect/pycache/Uve.cpython-310.pyc
--- a/classification_model/WaveSelect/pycache/Uve.cpython-311.pyc
+++ b/classification_model/WaveSelect/pycache/Uve.cpython-311.pyc
--- a/classification_model/WaveSelect/pycache/Uve.cpython-312.pyc
+++ b/classification_model/WaveSelect/pycache/Uve.cpython-312.pyc
--- a/classification_model/WaveSelect/pycache/Uve.cpython-38.pyc
+++ b/classification_model/WaveSelect/pycache/Uve.cpython-38.pyc
--- a/classification_model/WaveSelect/pycache/Uve.cpython-39.pyc
+++ b/classification_model/WaveSelect/pycache/Uve.cpython-39.pyc
--- a/classification_model/WaveSelect/pycache/WaveSelcet.cpython-310.pyc
+++ b/classification_model/WaveSelect/pycache/WaveSelcet.cpython-310.pyc
--- a/classification_model/WaveSelect/pycache/WaveSelcet.cpython-311.pyc
+++ b/classification_model/WaveSelect/pycache/WaveSelcet.cpython-311.pyc
--- a/classification_model/WaveSelect/pycache/WaveSelcet.cpython-312.pyc
+++ b/classification_model/WaveSelect/pycache/WaveSelcet.cpython-312.pyc
--- a/classification_model/WaveSelect/pycache/WaveSelcet.cpython-38.pyc
+++ b/classification_model/WaveSelect/pycache/WaveSelcet.cpython-38.pyc
--- a/classification_model/WaveSelect/pycache/WaveSelcet.cpython-39.pyc
+++ b/classification_model/WaveSelect/pycache/WaveSelcet.cpython-39.pyc
--- a/classification_model/WaveSelect/pycache/centry.cpython-311.pyc
+++ b/classification_model/WaveSelect/pycache/centry.cpython-311.pyc
--- a/classification_model/WaveSelect/centry.py
+++ b/classification_model/WaveSelect/centry.py
@ -0,0 +1,123 @@
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from WaveSelect.Lar import Lar
+from WaveSelect.Spa import SPA
+from WaveSelect.Uve import UVE
+from WaveSelect.Cars import CARS_Cloud
+from WaveSelect.GA import GA
+from WaveSelect.ReliefF import ReliefF
+from sklearn.model_selection import train_test_split
+from collections import Counter
+from tqdm import tqdm
+import os
+
+def IntegratedWaveSelect(methods, X, y, strategy="voting", column_names=None,  name='', result_dir=''):
+    # 检查并确保 X 是 DataFrame
+    if isinstance(X, np.ndarray):
+        if column_names is None:
+            column_names = [f"{i}" for i in range(X.shape[1])]
+        X_df = pd.DataFrame(X, columns=column_names)
+    else:
+        X_df = X
+
+    feature_indices_list = []
+
+    for method in tqdm(methods, desc="Processing Feature Selection Methods"):
+        print(f"Applying method: {method}")  # 调试信息
+        if method == "Cars":
+            save_path = os.path.join(result_dir, f"{name}_cars.png")
+            Featuresecletidx = CARS_Cloud(X, y, N=50, f=20, cv=10, save_path=save_path)
+            Featuresecletidx = Featuresecletidx.astype(int)
+            feature_indices_list.append(Featuresecletidx)
+        elif method == "Lars":
+            Featuresecletidx = Lar(X_df.values, y)
+            feature_indices_list.append(Featuresecletidx)
+        elif method == "Uve":
+            uve = UVE(X_df.values, y, 20)
+            uve.calcCriteria()
+            uve.evalCriteria(cv=5)
+            Featuresecletidx = uve.cutFeature()
+            feature_indices_list.append(Featuresecletidx)
+        elif method == "Spa":
+            save_path = os.path.join(result_dir, f"{name}_spa.png")
+            Xcal, Xval, ycal, yval = train_test_split(X_df, y, test_size=0.2)
+            Featuresecletidx = SPA().spa(
+                Xcal=Xcal.values, ycal=ycal, m_min=4, m_max=32, Xval=Xval.values, yval=yval, autoscaling=1,save_path=save_path)
+            feature_indices_list.append(Featuresecletidx)
+        elif method == "GA":
+            Featuresecletidx = GA(X_df.values, y, 10)
+            feature_indices_list.append(Featuresecletidx)
+        elif method == "ReliefF":
+            relieff = ReliefF(n_neighbors=20, n_features_to_keep=20)
+            Featuresecletidx = relieff.fit(X_df.values, y)
+            feature_indices_list.append(Featuresecletidx)
+        else:
+            print(f"No such method: {method}")
+            continue
+
+        print(f"Selected indices by {method}: {Featuresecletidx}")  # 调试信息
+
+    print("Feature indices list after all methods:", feature_indices_list)  # 调试信息
+
+    if strategy == "voting":
+        if feature_indices_list:
+            all_indices = np.concatenate(feature_indices_list)
+            print("All indices concatenated:", all_indices)  # 调试信息
+            counter = Counter(all_indices)
+            print("Counter result:", counter)  # 调试信息
+            selected_features = [
+                idx for idx, count in tqdm(counter.items(), desc="Voting Selection")
+                if count > len(methods) / 2
+            ]
+            print("Selected features after voting:", selected_features)  # 调试信息
+        else:
+            print("No features selected by any method.")  # 提示信息
+            selected_features = []
+    elif strategy == "weighted":
+        weights = {method: 1 for method in methods}
+        weighted_counts = Counter()
+        for method, indices in zip(methods, feature_indices_list):
+            for idx in indices:
+                weighted_counts[idx] += weights[method]
+        print("Weighted counts:", weighted_counts)  # 调试信息
+        selected_features = [
+            idx for idx, count in tqdm(weighted_counts.items(), desc="Weighted Selection")
+            if count >= np.mean(list(weighted_counts.values()))
+        ]
+        print("Selected features after weighted strategy:", selected_features)  # 调试信息
+    elif strategy == "bagging":
+        num_iterations = 5
+        selected_features = set()
+        for _ in tqdm(range(num_iterations), desc="Bagging Iterations"):
+            X_sample, _, y_sample, _ = train_test_split(X_df, y, test_size=0.5)
+            sub_feature_indices_list = []
+            for method in methods:
+                if method == "Spa":
+                    Xcal, Xval, ycal, yval = train_test_split(X_sample, y_sample, test_size=0.2)
+                    sub_feature_indices = SPA().spa(Xcal=Xcal.values, ycal=ycal, m_min=4, m_max=32, Xval=Xval.values,
+                                                    yval=yval, autoscaling=1)
+                elif method == "Cars":
+                    sub_feature_indices = CARS_Cloud(X_sample.values, y_sample)
+                else:
+                    continue
+                sub_feature_indices_list.append(sub_feature_indices)
+            for sub_indices in sub_feature_indices_list:
+                selected_features.update(sub_indices)
+        selected_features = list(selected_features)
+        print("Selected features after bagging strategy:", selected_features)  # 调试信息
+    else:
+        raise ValueError("Invalid strategy. Choose from 'voting', 'weighted', or 'bagging'.")
+
+    selected_features = list(map(int, selected_features))  # 确保索引是整数类型
+    X_Feature = X_df.iloc[:, selected_features]
+    selected_columns = X_df.columns[selected_features]
+
+
+
+
+
+    # 关闭图像以释放资源
+    plt.close()
+
+    return X_Feature, y, selected_columns