增加模块；增加主调用命令

2026-01-07 16:36:47 +08:00
commit 2d4b170a45
109 changed files with 55763 additions and 0 deletions
--- a/Feature_Selection_method/Cars.py
+++ b/Feature_Selection_method/Cars.py
@ -0,0 +1,176 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+import copy
+from sklearn.cross_decomposition import PLSRegression
+from sklearn.metrics import mean_squared_error
+from sklearn.model_selection import KFold
+
+
+def PC_Cross_Validation(X, y, pc, cv):
+    '''
+        X : 光谱矩阵 (DataFrame) nxm
+        y : 浓度阵 (Series) （化学值）
+        pc: 最大主成分数
+        cv: 交叉验证数量
+    return :
+        RMSECV: 各主成分数对应的RMSECV
+        rindex: 最佳主成分数
+    '''
+    kf = KFold(n_splits=cv)
+    RMSECV = []
+    for i in range(pc):
+        RMSE = []
+        for train_index, test_index in kf.split(X):
+            x_train, x_test = X.iloc[train_index], X.iloc[test_index]
+            y_train, y_test = y.iloc[train_index], y.iloc[test_index]
+            pls = PLSRegression(n_components=i + 1)
+            pls.fit(x_train, y_train)
+            y_predict = pls.predict(x_test)
+            RMSE.append(np.sqrt(mean_squared_error(y_test, y_predict)))
+        RMSE_mean = np.mean(RMSE)
+        RMSECV.append(RMSE_mean)
+    rindex = np.argmin(RMSECV)
+    return RMSECV, rindex
+
+
+def Cross_Validation(X, y, pc, cv):
+    '''
+     X : 光谱矩阵 (DataFrame) nxm
+     y : 浓度阵 (Series) （化学值）
+     pc: 最大主成分数
+     cv: 交叉验证数量
+     return :
+            RMSECV: 各主成分数对应的RMSECV
+    '''
+    kf = KFold(n_splits=cv)
+    RMSE = []
+    for train_index, test_index in kf.split(X):
+        x_train, x_test = X.iloc[train_index], X.iloc[test_index]
+        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
+        pls = PLSRegression(n_components=pc)
+        pls.fit(x_train, y_train)
+        y_predict = pls.predict(x_test)
+        RMSE.append(np.sqrt(mean_squared_error(y_test, y_predict)))
+    RMSE_mean = np.mean(RMSE)
+    return RMSE_mean
+
+
+def CARS_Cloud(X, y, N=50, f=20, cv=10, save_fig=False, save_path=None):
+    '''
+    X : 光谱矩阵 (DataFrame 或 ndarray)
+    y : 浓度阵 (Series 或 ndarray)
+    N : 蒙特卡洛迭代次数
+    f : 最大特征数
+    cv : 交叉验证的次数
+    save_fig : 是否保存图像
+    save_path : 图像保存路径
+    return :
+        OptWave : 选择的波长
+    '''
+    p = 0.8
+    m, n = X.shape
+    u = np.power((n / 2), (1 / (N - 1)))
+    k = (1 / (N - 1)) * np.log(n / 2)
+    cal_num = np.round(m * p)
+    b2 = np.arange(n)
+    x = X  # 将 DataFrame 转换为 numpy 数组
+    y = y # 将 Series 转换为 numpy 数组
+    D = np.vstack((np.array(b2).reshape(1, -1), x))
+    WaveData = []
+    WaveNum = []
+    RMSECV = []
+    r = []
+
+    for i in range(1, N + 1):
+        r.append(u * np.exp(-1 * k * i))
+        wave_num = int(np.round(r[i - 1] * n))
+        WaveNum = np.hstack((WaveNum, wave_num))
+        cal_index = np.random.choice(np.arange(m), size=int(cal_num), replace=False)
+        wave_index = b2[:wave_num].reshape(1, -1)[0]
+
+        # 使用 np.ix_ 来进行行列索引
+        xcal = x[np.ix_(cal_index, wave_index)]  # 选择对应的行和列
+        ycal = y[cal_index]  # 选择对应的 y
+
+        # 将 ycal 转换为一维数组
+        ycal = ycal.ravel()  # 使其成为一维数组
+
+        x = x[:, wave_index]  # 更新 x
+        D = D[:, wave_index]  # 更新 D
+        d = D[0, :].reshape(1, -1)
+        wnum = n - wave_num
+        if wnum > 0:
+            d = np.hstack((d, np.full((1, wnum), -1)))
+        if len(WaveData) == 0:
+            WaveData = d
+        else:
+            WaveData = np.vstack((WaveData, d.reshape(1, -1)))
+
+        if wave_num < f:
+            f = wave_num
+
+        pls = PLSRegression(n_components=f)
+        pls.fit(xcal, ycal)
+        beta = pls.coef_
+
+        # 针对新版sklearn处理 coef_ 的方式
+        if beta.shape[0] == 1:  # 新版sklearn，(1, x)
+            b = np.abs(beta[0])  # 从第一行提取数据
+            coeff = beta[0, b2]  # 修改为beta[0, b2]，因为coef只有一行
+        else:  # 旧版sklearn，(x, 1)
+            b = np.abs(beta[:, 0])  # 从列中提取数据
+            coeff = beta[b2, 0]  # 修改为beta[b2, 0]，因为coef只有一列
+
+        b2 = np.argsort(-b, axis=0)
+        coef = copy.deepcopy(beta)
+        coeff = coef[b2, :].reshape(len(b2), -1)
+        rmsecv, rindex = PC_Cross_Validation(pd.DataFrame(xcal), pd.Series(ycal), f, cv)
+        RMSECV.append(Cross_Validation(pd.DataFrame(xcal), pd.Series(ycal), rindex + 1, cv))
+
+    WAVE = []
+    for i in range(WaveData.shape[0]):
+        wd = WaveData[i, :]
+        WD = np.ones((len(wd)))
+        for j in range(len(wd)):
+            ind = np.where(wd == j)
+            if len(ind[0]) == 0:
+                WD[j] = 0
+            else:
+                WD[j] = wd[ind[0]]
+        if len(WAVE) == 0:
+            WAVE = copy.deepcopy(WD)
+        else:
+            WAVE = np.vstack((WAVE, WD.reshape(1, -1)))
+
+    MinIndex = np.argmin(RMSECV)
+    Optimal = WAVE[MinIndex, :]
+    boindex = np.where(Optimal != 0)
+    OptWave = boindex[0]
+
+    plt.figure(figsize=(12, 10))
+    # 设置字体为新罗马
+    plt.rcParams['font.sans-serif'] = ['Times New Roman']  # 使用 Times New Roman 字体
+    plt.rcParams['axes.unicode_minus'] = False  # 解决负号显示问题
+    fonts = 20
+
+    plt.subplot(211)
+    plt.xlabel('Monte Carlo Iterations', fontsize=fonts)
+    plt.ylabel('Number of Selected Wavelengths', fontsize=fonts)
+    plt.title('Optimal Iteration: ' + str(MinIndex), fontsize=fonts)
+    plt.plot(np.arange(N), WaveNum)
+
+    plt.subplot(212)
+    plt.xlabel('Monte Carlo Iterations', fontsize=fonts)
+    plt.ylabel('RMSECV', fontsize=fonts)
+    plt.plot(np.arange(N), RMSECV)
+
+    # 保存图像
+    if save_fig:
+        plt.savefig(save_path)  # 保存图像到文件
+        print(f"The figure has been saved as {save_path}")
+
+
+    # plt.show()
+
+    return OptWave
--- a/Feature_Selection_method/GA.py
+++ b/Feature_Selection_method/GA.py
@ -0,0 +1,59 @@
+from deap import base, creator, tools, algorithms
+import numpy as np
+from sklearn.datasets import make_classification
+from sklearn.model_selection import cross_val_score
+from sklearn.ensemble import RandomForestClassifier
+
+
+def GA(X, y, n_generations=20, population_size=50, crossover_prob=0.7, mutation_prob=0.2):
+    """
+    使用遗传算法进行特征选择，返回选择的特征索引。
+
+    参数:
+    X (ndarray): 特征矩阵
+    y (ndarray): 标签
+    n_generations (int): 迭代次数
+    population_size (int): 种群大小
+    crossover_prob (float): 交叉概率
+    mutation_prob (float): 变异概率
+
+    返回:
+    list: 选择的特征索引
+    """
+    # 创建适应度和个体
+    creator.create("FitnessMax", base.Fitness, weights=(1.0,))
+    creator.create("Individual", list, fitness=creator.FitnessMax)
+
+    toolbox = base.Toolbox()
+    toolbox.register("attr_bool", lambda: np.random.randint(0, 2))
+    toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, n=X.shape[1])
+    toolbox.register("population", tools.initRepeat, list, toolbox.individual)
+
+    # 定义适应度函数
+    def evaluate(individual):
+        selected_features = [index for index, val in enumerate(individual) if val == 1]
+        if not selected_features:
+            return 0,  # 没有特征时适应度为 0
+        X_selected = X[:, selected_features]
+        clf = RandomForestClassifier(random_state=42)
+        score = cross_val_score(clf, X_selected, y, cv=5).mean()  # 5 折交叉验证
+        return score,
+
+    toolbox.register("evaluate", evaluate)
+    toolbox.register("mate", tools.cxTwoPoint)
+    toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
+    toolbox.register("select", tools.selTournament, tournsize=3)
+
+    # 初始化种群
+    population = toolbox.population(n=population_size)
+
+    # 运行遗传算法
+    result_population, _ = algorithms.eaSimple(population, toolbox, cxpb=crossover_prob,
+                                               mutpb=mutation_prob, ngen=n_generations,
+                                               verbose=False)
+
+    # 获取最优个体
+    best_individual = tools.selBest(result_population, k=1)[0]
+    selected_features = [index for index, val in enumerate(best_individual) if val == 1]
+
+    return selected_features
--- a/Feature_Selection_method/Lar.py
+++ b/Feature_Selection_method/Lar.py
@ -0,0 +1,31 @@
+from sklearn import linear_model
+import numpy as np
+
+def Lar(X, y, nums=40):
+    """
+    使用 LARS（Least Angle Regression）选择重要的特征波长。
+
+    参数：
+        X : np.ndarray，预测变量矩阵（输入数据）
+        y : np.ndarray，标签（目标值）
+        nums : int，选择的特征点数量，默认为 40
+
+    返回：
+        np.ndarray，选择的特征波长索引
+    """
+    # 初始化 LARS 模型
+    Lars = linear_model.Lars()
+
+    # 拟合模型
+    Lars.fit(X, y)
+
+    # 获取回归系数的绝对值，表示特征的重要性
+    corflist = np.abs(Lars.coef_)
+
+    # 将系数转换为数组并按重要性排序，选择前 nums 个最重要的特征
+    SpectrumList = np.argsort(corflist)[-nums:][::-1]
+
+    # 对选择的特征索引进行排序，保证顺序一致
+    SpectrumList = np.sort(SpectrumList)
+
+    return SpectrumList
--- a/Feature_Selection_method/ReliefF.py
+++ b/Feature_Selection_method/ReliefF.py
@ -0,0 +1,88 @@
+
+import numpy as np
+from sklearn.neighbors import NearestNeighbors
+class ReliefF:
+    def __init__(self, n_neighbors=20, n_features_to_keep=20):
+        """
+        初始化 ReliefF 算法参数。
+        :param n_neighbors: 每个样本的近邻数量。
+        :param n_features_to_keep: 每次保留的特征数量。
+        """
+        self.n_neighbors = n_neighbors
+        self.n_features_to_keep = n_features_to_keep
+        self.feature_scores = None  # 用于存储每个特征的评分
+        self.top_features = None  # 用于存储评分最高的特征索引
+
+    def fit(self, X, y):
+        """
+        根据给定的数据 X 和标签 y 计算特征评分。
+        :param X: 输入特征矩阵。
+        :param y: 类别标签。
+        :return: 返回选择的特征索引。
+        """
+        m, n = X.shape  # m 是样本数，n 是特征数
+
+        self.feature_scores = np.zeros(n)  # 初始化特征评分为 0
+
+        # 寻找每个样本的 n_neighbors 个近邻
+        nbrs = NearestNeighbors(n_neighbors=self.n_neighbors + 1).fit(X)
+        distances, indices = nbrs.kneighbors(X)
+
+        # 遍历每个样本，更新特征评分
+        for i in range(m):
+            y_i = y[i]  # 当前样本的类别标签
+
+            # 初始化同类和异类邻居
+            hit_neighbors = []
+            miss_neighbors = []
+
+            for j in indices[i][1:]:  # indices[i][0] 是样本自身，跳过
+                if y[j] == y_i:
+                    hit_neighbors.append(X[j])
+                else:
+                    miss_neighbors.append(X[j])
+
+            # 更新每个特征的评分
+            for f in range(n):
+                for hit in hit_neighbors:
+                    self.feature_scores[f] -= (X[i, f] - hit[f]) ** 2 / (self.n_neighbors * m)
+                for miss in miss_neighbors:
+                    self.feature_scores[f] += (X[i, f] - miss[f]) ** 2 / (self.n_neighbors * m)
+
+        # 选择评分最高的 n_features_to_keep 个特征的索引
+        self.top_features = np.argsort(self.feature_scores)[-self.n_features_to_keep:]
+
+        return self.top_features  # 返回选择的特征索引
+
+    def fit_transform(self, X, y):
+        """一步完成拟合和转换，返回选择的特征索引。"""
+        return self.fit(X, y)
+
+def multi_scale_relieff_stratified(X, y, segment_size=100, n_subsegments=20, n_features_per_subsegment=5):
+    """
+    分层多尺度特征选择，确保每个波长段都能被覆盖。
+    :param X: 输入特征矩阵。
+    :param y: 类别标签。
+    :param segment_size: 每个波长段的大小。
+    :param n_subsegments: 每个段内的子区域数量。
+    :param n_features_per_subsegment: 每个子区域选择的特征数量。
+    :return: 分层选择的特征索引。
+    """
+    selected_features = []
+
+    # 遍历每个波长段
+    for i in range(0, X.shape[1], segment_size):
+        segment_X = X[:, i:i + segment_size]
+        subsegment_size = segment_size // n_subsegments  # 子区域大小
+
+        # 在每个子区域内进行特征选择
+        for j in range(0, segment_size, subsegment_size):
+            subsegment_X = segment_X[:, j:j + subsegment_size]
+            relief = ReliefF(n_neighbors=10, n_features_to_keep=n_features_per_subsegment)
+            subsegment_selected = relief.fit_transform(subsegment_X, y)
+
+            # 将局部索引转换为全局索引并添加到结果中
+            selected_features.extend(subsegment_selected + i + j)
+
+    # 返回去重后的特征索引
+    return np.unique(selected_features)
--- a/Feature_Selection_method/Spa.py
+++ b/Feature_Selection_method/Spa.py
@ -0,0 +1,116 @@
+import scipy.stats
+import numpy as np
+from scipy.linalg import qr, inv, pinv
+import scipy.stats
+from progress.bar import Bar
+from matplotlib import pyplot as plt
+class SPA:
+    def _projections_qr(self, X, k, M):
+        X_projected = X.copy()
+        norms = np.sum((X ** 2), axis=0)
+        norm_max = np.amax(norms)
+        X_projected.iloc[:, k] = X_projected.iloc[:, k] * 2 * norm_max / norms[k]
+        _, __, order = qr(X_projected.to_numpy(), 0, pivoting=True)
+        return order[:M].T
+
+    def _validation(self, Xcal, ycal, var_sel, Xval=None, yval=None):
+        N = Xcal.shape[0]
+        NV = Xval.shape[0] if Xval is not None else 0
+
+        yhat, e = None, None
+        if NV > 0:
+            Xcal_ones = np.hstack([np.ones((N, 1)), Xcal.iloc[:, var_sel].to_numpy()])
+            b = np.linalg.lstsq(Xcal_ones, ycal, rcond=None)[0]
+            Xval_ones = np.hstack([np.ones((NV, 1)), Xval.iloc[:, var_sel].to_numpy()])
+            yhat = Xval_ones.dot(b)
+            e = yval - yhat
+        else:
+            yhat = np.zeros((N, 1))
+            for i in range(N):
+                cal = np.hstack([np.arange(i), np.arange(i + 1, N)])
+                X = Xcal.iloc[cal, var_sel]
+                y = ycal.iloc[cal]
+                X_ones = np.hstack([np.ones((N - 1, 1)), X.to_numpy()])
+                b = np.linalg.lstsq(X_ones, y, rcond=None)[0]
+                xtest = Xcal.iloc[i, var_sel].to_numpy()
+                yhat[i] = np.hstack([1, xtest]).dot(b)
+            e = ycal.to_numpy() - yhat
+        return yhat, e
+
+    def spa(self, Xcal, ycal, m_min=1, m_max=None, Xval=None, yval=None, autoscaling=1, save_path=None):
+        N, K = Xcal.shape
+        m_max = min(N - 1, K) if m_max is None else m_max
+
+        normalization_factor = Xcal.std(ddof=1, axis=0) if autoscaling else np.ones(K)
+        Xcaln = (Xcal - Xcal.mean()) / normalization_factor
+
+        SEL = np.zeros((m_max, K))
+        with Bar('Projections :', max=K) as bar:
+            for k in range(K):
+                SEL[:, k] = self._projections_qr(Xcaln, k, m_max)
+                bar.next()
+
+        PRESS = np.full((m_max + 1, K), np.inf)
+        with Bar('Evaluating subsets:', max=K * (m_max - m_min + 1)) as bar:
+            for k in range(K):
+                for m in range(m_min, m_max + 1):
+                    var_sel = SEL[:m, k].astype(int)
+                    _, e = self._validation(Xcal, ycal, var_sel, Xval, yval)
+                    PRESS[m, k] = e.T @ e
+                    bar.next()
+
+        m_sel = np.argmin(PRESS, axis=0)
+        k_sel = np.argmin(np.min(PRESS, axis=0))
+        var_sel_phase2 = SEL[:m_sel[k_sel], k_sel].astype(int)
+
+        Xcal2 = np.hstack([np.ones((N, 1)), Xcal.iloc[:, var_sel_phase2].to_numpy()])
+        b = np.linalg.lstsq(Xcal2, ycal, rcond=None)[0]
+        std_deviation = Xcal2.std(ddof=1, axis=0)
+        relev = np.abs(b * std_deviation)[1:]
+
+        index_decreasing_relev = np.argsort(-relev)
+        PRESS_scree = np.empty(len(var_sel_phase2))
+        for i in range(len(var_sel_phase2)):
+            var_sel = var_sel_phase2[index_decreasing_relev[:i + 1]]
+            _, e = self._validation(Xcal, ycal, var_sel, Xval, yval)
+            PRESS_scree[i] = np.conj(e).T @ e
+
+        RMSEP_scree = np.sqrt(PRESS_scree / len(e))
+        alpha = 0.25
+        dof = len(e)
+        fcrit = scipy.stats.f.ppf(1 - alpha, dof, dof)
+        PRESS_crit = np.min(PRESS_scree) * fcrit
+        i_crit = np.min(np.nonzero(PRESS_scree < PRESS_crit))
+        i_crit = max(m_min, i_crit)
+        var_sel = var_sel_phase2[index_decreasing_relev[:i_crit]]
+
+        # 绘图
+        plt.figure()
+
+        # 设置字体为 Times New Roman
+        plt.rcParams['font.sans-serif'] = ['Times New Roman']
+        plt.rcParams['axes.unicode_minus'] = False  # 确保负号显示正常
+
+        # 设置标题、标签和网格
+        plt.xlabel('Number of variables included in the model', fontsize=14)
+        plt.ylabel('RMSE', fontsize=14)
+        plt.title(f'Final number of selected variables: {len(var_sel)} (RMSE={RMSEP_scree[i_crit]:.4f})', fontsize=16)
+
+        # 绘制 RMSEP 曲线
+        plt.plot(RMSEP_scree, label='RMSEP Scree Plot')
+        plt.scatter(i_crit, RMSEP_scree[i_crit], color='r', marker='s', label='Selected Point')
+
+        # 添加网格和图例
+        plt.grid(True)
+        plt.legend()
+
+        # 显示或保存图像
+        if save_path:
+            plt.savefig(save_path, bbox_inches='tight', dpi=300)
+            print(f"图像已保存至: {save_path}")
+        else:
+            plt.show()
+        return var_sel, var_sel_phase2
+
+    def __repr__(self):
+        return "SPA()"
--- a/Feature_Selection_method/Uve.py
+++ b/Feature_Selection_method/Uve.py
@ -0,0 +1,82 @@
+from sklearn.cross_decomposition import PLSRegression
+from sklearn.linear_model import LinearRegression
+from sklearn.model_selection import ShuffleSplit, cross_val_score
+from numpy.linalg import matrix_rank as rank
+import numpy as np
+
+
+class UVE:
+    def __init__(self, x, y, ncomp=20, nrep=500, testSize=0.2):
+        """
+        初始化 UVE 模型。
+
+        参数：
+            x : np.ndarray，预测变量矩阵（输入数据）
+            y : np.ndarray，标签（目标值）
+            ncomp : int，PLS 中的最大潜变量数量，默认为 20
+            nrep : int，重复次数，默认为 500
+            testSize : float，训练集中划分的测试集比例，默认为 0.2
+        """
+        self.x = x
+        self.y = y
+        self.ncomp = min(ncomp, rank(x))  # 确保潜变量数量不超过矩阵秩
+        self.nrep = nrep
+        self.testSize = testSize
+
+        self.criteria = None  # 存储标准化系数
+        self.featureIndex = None  # 存储特征排序索引
+        self.featureR2 = np.full(self.x.shape[1], np.nan)  # 存储 R² 值
+        self.selFeature = None  # 存储最终选择的特征索引
+
+    def calcCriteria(self):
+        """计算每个变量的标准化系数 (meanCoef / stdCoef)。"""
+        PLSCoef = np.zeros((self.nrep, self.x.shape[1]))  # 存储每次迭代的 PLS 系数
+        ss = ShuffleSplit(n_splits=self.nrep, test_size=self.testSize)
+
+        # 遍历每次划分的数据集，计算 PLS 系数
+        for step, (train, test) in enumerate(ss.split(self.x, self.y)):
+            xtrain, ytrain = self.x[train], self.y[train]
+            plsModel = PLSRegression(n_components=min(self.ncomp, rank(xtrain)))
+            plsModel.fit(xtrain, ytrain)
+            PLSCoef[step, :] = plsModel.coef_.flatten()
+
+        # 使用 np.divide 处理除法，避免除以零的问题
+        meanCoef = np.mean(PLSCoef, axis=0)
+        stdCoef = np.std(PLSCoef, axis=0)
+        self.criteria = np.divide(meanCoef, stdCoef, out=np.zeros_like(meanCoef), where=stdCoef != 0)
+
+    def evalCriteria(self, cv=3):
+        """基于标准化系数评估每个变量组合的 R² 值。"""
+        # 按标准化系数的绝对值降序排序，获取特征的索引
+        self.featureIndex = np.argsort(-np.abs(self.criteria))
+
+        # 依次增加特征，计算每个组合的 R² 值
+        for i in range(self.x.shape[1]):
+            xi = self.x[:, self.featureIndex[:i + 1]]  # 选择前 i+1 个特征
+
+            # 根据特征数量选择回归模型
+            if i < self.ncomp:
+                regModel = LinearRegression()
+            else:
+                regModel = PLSRegression(n_components=min(self.ncomp, rank(xi)))
+
+            # 进行交叉验证并存储 R² 值
+            cvScore = cross_val_score(regModel, xi, self.y, cv=cv, scoring='r2')
+            self.featureR2[i] = np.mean(cvScore)
+
+    def cutFeature(self, *args):
+        """根据 R² 最大值选择特征，并返回所选特征的索引（列号）。"""
+        # 找到 R² 最大值对应的索引位置
+        cuti = np.nanargmax(self.featureR2)  # 使用 nanargmax 以避免 NaN 的影响
+        self.selFeature = self.featureIndex[:cuti + 1]  # 最优特征索引
+
+        # 如果传入其他数据集，返回筛选后的数据
+        if len(args) != 0:
+            returnx = list(args)
+            for i, argi in enumerate(args):
+                if argi.shape[1] == self.x.shape[1]:
+                    returnx[i] = argi[:, self.selFeature]
+            return returnx
+
+        # 返回所选特征的索引（列号）
+        return self.selFeature
--- a/Feature_Selection_method/batch_feature_selection.py
+++ b/Feature_Selection_method/batch_feature_selection.py
@ -0,0 +1,728 @@
+"""
+批量特征选择工具
+支持对多个CSV文件或数据集进行批量特征选择
+"""
+
+import os
+import pandas as pd
+import numpy as np
+from pathlib import Path
+from typing import List, Dict, Optional, Tuple, Union
+import argparse
+import time
+from concurrent.futures import ProcessPoolExecutor, as_completed
+import warnings
+
+# 导入特征选择模块
+from feture_select import (
+    FeatureSelectionConfig,
+    select_features_from_csv,
+    select_features_from_data
+)
+
+warnings.filterwarnings('ignore')
+
+
+def parse_column_range(column_range: Union[str, int, List[Union[str, int]]], total_columns: int) -> List[int]:
+    """
+    解析列范围字符串，返回列索引列表
+
+    Args:
+        column_range: 列范围，如 "0:5", "2,4,6-8", [0,1,2] 或单个索引
+        total_columns: 总列数
+
+    Returns:
+        列索引列表
+    """
+    if isinstance(column_range, (int, np.integer)):
+        # 单个列索引
+        if column_range >= total_columns or column_range < 0:
+            raise ValueError(f"Column index {column_range} out of range [0, {total_columns-1}]")
+        return [column_range]
+
+    elif isinstance(column_range, str):
+        # 解析范围字符串
+        columns = []
+        # 分割多个范围（用逗号分隔）
+        for part in column_range.split(','):
+            part = part.strip()
+            if ':' in part:
+                # 范围选择，如 "0:5"
+                start, end = part.split(':')
+                start = int(start.strip()) if start.strip() else 0
+                end = int(end.strip()) if end.strip() else total_columns
+                if start < 0:
+                    start = total_columns + start
+                if end < 0:
+                    end = total_columns + end
+                if start >= total_columns or end > total_columns:
+                    raise ValueError(f"Range {start}:{end} out of column range [0, {total_columns-1}]")
+                columns.extend(range(start, end))
+            else:
+                # 单个索引
+                idx = int(part.strip())
+                if idx < 0:
+                    idx = total_columns + idx
+                if idx >= total_columns or idx < 0:
+                    raise ValueError(f"Column index {idx} out of range [0, {total_columns-1}]")
+                columns.append(idx)
+        return list(set(columns))  # 去重
+
+    elif isinstance(column_range, (list, tuple)):
+        # 直接的列索引列表
+        columns = []
+        for idx in column_range:
+            if isinstance(idx, str):
+                if ':' in idx:
+                    # 处理列表中的范围字符串
+                    start, end = idx.split(':')
+                    start = int(start.strip()) if start.strip() else 0
+                    end = int(end.strip()) if end.strip() else total_columns
+                    if start < 0:
+                        start = total_columns + start
+                    if end < 0:
+                        end = total_columns + end
+                    if start >= total_columns or end > total_columns:
+                        raise ValueError(f"Range {start}:{end} out of column range [0, {total_columns-1}]")
+                    columns.extend(range(start, end))
+                else:
+                    idx_int = int(idx.strip())
+                    if idx_int < 0:
+                        idx_int = total_columns + idx_int
+                    if idx_int >= total_columns or idx_int < 0:
+                        raise ValueError(f"Column index {idx_int} out of range [0, {total_columns-1}]")
+                    columns.append(idx_int)
+            else:
+                if idx < 0:
+                    idx = total_columns + idx
+                if idx >= total_columns or idx < 0:
+                    raise ValueError(f"Column index {idx} out of range [0, {total_columns-1}]")
+                columns.append(idx)
+        return list(set(columns))  # 去重
+
+    else:
+        raise ValueError(f"Unsupported column range format: {type(column_range)}")
+
+
+def convert_column_indices_to_names(df: pd.DataFrame, column_indices: List[int]) -> List[str]:
+    """
+    将列索引转换为列名
+
+    Args:
+        df: DataFrame
+        column_indices: 列索引列表
+
+    Returns:
+        列名列表
+    """
+    return [df.columns[i] for i in column_indices]
+
+
+def resolve_spectral_columns(df: pd.DataFrame, spectral_columns: Union[str, List[Union[str, int]], None]) -> List[str]:
+    """
+    解析光谱列配置，支持列名和列号范围
+
+    Args:
+        df: DataFrame
+        spectral_columns: 光谱列配置
+
+    Returns:
+        光谱列名列表
+    """
+    if spectral_columns is None:
+        # 默认使用除标签列外的所有列
+        return df.columns.tolist()
+
+    elif isinstance(spectral_columns, str) and spectral_columns == "auto":
+        # 自动检测光谱列（通常是数值列）
+        potential_spectral_cols = []
+        for col in df.columns:
+            if pd.api.types.is_numeric_dtype(df[col]):
+                # 检查是否是连续的数值序列（光谱波段）
+                try:
+                    values = pd.to_numeric(df[col], errors='coerce')
+                    if values.notna().sum() > len(df) * 0.8:  # 至少80%是数值
+                        potential_spectral_cols.append(col)
+                except:
+                    continue
+        return potential_spectral_cols
+
+    else:
+        # 解析列范围
+        try:
+            column_indices = parse_column_range(spectral_columns, len(df.columns))
+            return convert_column_indices_to_names(df, column_indices)
+        except ValueError as e:
+            print(f"解析光谱列时出错: {e}")
+            print(f"将使用自动检测模式")
+            return resolve_spectral_columns(df, "auto")
+
+
+def find_csv_files(directory: Union[str, Path], pattern: str = "*.csv") -> List[Path]:
+    """
+    在目录中查找所有CSV文件
+
+    Args:
+        directory: 搜索目录
+        pattern: 文件匹配模式
+
+    Returns:
+        CSV文件路径列表
+    """
+    directory = Path(directory)
+    if not directory.exists():
+        raise FileNotFoundError(f"目录不存在: {directory}")
+
+    csv_files = list(directory.glob(pattern))
+    csv_files.sort()  # 排序以保证顺序一致性
+
+    print(f"在目录 {directory} 中找到 {len(csv_files)} 个CSV文件")
+    return csv_files
+
+
+def create_batch_configs(csv_files: List[Path],
+                        base_config: FeatureSelectionConfig,
+                        output_base_dir: Union[str, Path]) -> List[Tuple[Path, FeatureSelectionConfig]]:
+    """
+    为每个CSV文件创建配置
+
+    Args:
+        csv_files: CSV文件列表
+        base_config: 基础配置
+        output_base_dir: 输出基础目录
+
+    Returns:
+        (文件路径, 配置) 元组列表
+    """
+    configs = []
+    output_base_dir = Path(output_base_dir)
+
+    for csv_file in csv_files:
+        try:
+            # 先读取CSV文件来获取列信息
+            df = pd.read_csv(csv_file, nrows=5)  # 只读取前5行来获取列信息
+
+            # 解析标签列
+            if isinstance(base_config.label_column, str):
+                if base_config.label_column not in df.columns:
+                    print(f"警告: 文件 {csv_file.name} 中不存在标签列 '{base_config.label_column}'，将尝试使用第一列")
+                    resolved_label_column = df.columns[0]
+                else:
+                    resolved_label_column = base_config.label_column
+            else:
+                # 如果是列索引
+                try:
+                    resolved_label_column = df.columns[base_config.label_column]
+                except IndexError:
+                    print(f"警告: 文件 {csv_file.name} 中的列索引 {base_config.label_column} 超出范围，将使用第一列")
+                    resolved_label_column = df.columns[0]
+
+            # 解析光谱列
+            resolved_spectral_columns = resolve_spectral_columns(df, base_config.spectral_columns)
+
+            # 确保标签列不在光谱列中
+            if resolved_label_column in resolved_spectral_columns:
+                resolved_spectral_columns.remove(resolved_label_column)
+
+            if len(resolved_spectral_columns) == 0:
+                print(f"警告: 文件 {csv_file.name} 中没有找到有效的光谱列")
+                continue
+
+            print(f"文件 {csv_file.name}: 标签列='{resolved_label_column}', 光谱列数={len(resolved_spectral_columns)}")
+
+        except Exception as e:
+            print(f"读取文件 {csv_file.name} 时出错: {e}，跳过此文件")
+            continue
+
+        # 为每个文件创建独立的输出目录
+        file_stem = csv_file.stem
+        file_output_dir = output_base_dir / file_stem
+        file_output_dir.mkdir(parents=True, exist_ok=True)
+
+        # 复制基础配置并修改文件特定的参数
+        config = FeatureSelectionConfig(
+            method=base_config.method,
+            method_params=base_config.method_params.copy(),
+            csv_file_path=str(csv_file),
+            label_column=resolved_label_column,
+            spectral_columns=resolved_spectral_columns,
+            output_csv=base_config.output_csv,
+            output_dir=str(file_output_dir),
+            output_filename=f"{file_stem}_selected_features",
+            save_plots=base_config.save_plots,
+            plot_name_prefix=f"{file_stem}_{base_config.method}",
+            plot_dir=str(file_output_dir) if base_config.plot_dir else None
+        )
+
+        configs.append((csv_file, config))
+
+    return configs
+
+
+def process_single_file(csv_file: Path, config: FeatureSelectionConfig) -> Dict:
+    """
+    处理单个CSV文件的特征选择
+
+    Args:
+        csv_file: CSV文件路径
+        config: 特征选择配置
+
+    Returns:
+        处理结果字典
+    """
+    result = {
+        'file': str(csv_file),
+        'file_name': csv_file.name,
+        'success': False,
+        'error': None,
+        'n_selected_features': 0,
+        'selected_columns': [],
+        'processing_time': 0,
+        'output_dir': config.output_dir
+    }
+
+    start_time = time.time()
+
+    try:
+        print(f"开始处理文件: {csv_file.name}")
+
+        # 执行特征选择
+        X_selected, y, selected_columns = select_features_from_csv(config)
+
+        # 记录结果
+        result['success'] = True
+        result['n_selected_features'] = X_selected.shape[1]
+        result['selected_columns'] = selected_columns.tolist() if hasattr(selected_columns, 'tolist') else list(selected_columns)
+        result['n_samples'] = X_selected.shape[0]
+
+        print(f"文件 {csv_file.name} 处理完成，选择特征数: {result['n_selected_features']}")
+
+    except Exception as e:
+        result['error'] = str(e)
+        print(f"文件 {csv_file.name} 处理失败: {e}")
+
+    finally:
+        result['processing_time'] = time.time() - start_time
+
+    return result
+
+
+def batch_feature_selection(csv_files: List[Path],
+                          base_config: FeatureSelectionConfig,
+                          output_base_dir: Union[str, Path],
+                          max_workers: Optional[int] = None,
+                          parallel: bool = False) -> List[Dict]:
+    """
+    批量执行特征选择
+
+    Args:
+        csv_files: CSV文件列表
+        base_config: 基础配置
+        output_base_dir: 输出基础目录
+        max_workers: 最大并行工作数
+        parallel: 是否并行处理
+
+    Returns:
+        处理结果列表
+    """
+    # 创建配置
+    file_configs = create_batch_configs(csv_files, base_config, output_base_dir)
+
+    results = []
+
+    if parallel and len(file_configs) > 1:
+        # 并行处理
+        print(f"开始并行处理 {len(file_configs)} 个文件 (最大并行数: {max_workers or 'auto'})")
+
+        with ProcessPoolExecutor(max_workers=max_workers) as executor:
+            # 提交所有任务
+            future_to_config = {
+                executor.submit(process_single_file, csv_file, config): (csv_file, config)
+                for csv_file, config in file_configs
+            }
+
+            # 收集结果
+            for future in as_completed(future_to_config):
+                csv_file, config = future_to_config[future]
+                try:
+                    result = future.result()
+                    results.append(result)
+                except Exception as e:
+                    print(f"并行处理失败 {csv_file.name}: {e}")
+                    results.append({
+                        'file': str(csv_file),
+                        'file_name': csv_file.name,
+                        'success': False,
+                        'error': str(e),
+                        'processing_time': 0
+                    })
+
+    else:
+        # 串行处理
+        print(f"开始串行处理 {len(file_configs)} 个文件")
+
+        for csv_file, config in file_configs:
+            result = process_single_file(csv_file, config)
+            results.append(result)
+
+    return results
+
+
+def save_batch_results(results: List[Dict], output_file: Union[str, Path]):
+    """
+    保存批量处理结果到文件
+
+    Args:
+        results: 处理结果列表
+        output_file: 输出文件路径
+    """
+    output_file = Path(output_file)
+    output_file.parent.mkdir(parents=True, exist_ok=True)
+
+    # 转换为DataFrame
+    results_df = pd.DataFrame(results)
+
+    # 保存为CSV
+    results_df.to_csv(output_file, index=False, encoding='utf-8')
+
+    print(f"批量处理结果已保存到: {output_file}")
+
+
+def print_batch_summary(results: List[Dict]):
+    """
+    打印批量处理摘要
+
+    Args:
+        results: 处理结果列表
+    """
+    total_files = len(results)
+    successful_files = sum(1 for r in results if r['success'])
+    failed_files = total_files - successful_files
+
+    total_time = sum(r['processing_time'] for r in results)
+    avg_time = total_time / total_files if total_files > 0 else 0
+
+    print("\n" + "="*60)
+    print("批量特征选择处理摘要")
+    print("="*60)
+    print(f"总文件数: {total_files}")
+    print(f"成功处理: {successful_files}")
+    print(f"失败处理: {failed_files}")
+    print(".2f")
+    print(".2f")
+
+    if successful_files > 0:
+        selected_features = [r['n_selected_features'] for r in results if r['success']]
+        print(f"平均选择的特征数: {np.mean(selected_features):.1f} ± {np.std(selected_features):.1f}")
+
+    if failed_files > 0:
+        print(f"\n失败的文件:")
+        for result in results:
+            if not result['success']:
+                print(f"  - {result['file_name']}: {result['error']}")
+
+    print("="*60)
+
+
+def create_example_batch_config() -> FeatureSelectionConfig:
+    """
+    创建示例批量配置
+
+    Returns:
+        示例配置对象
+    """
+    return FeatureSelectionConfig(
+        method="CARS",  # 可以使用: Cars, Lars, Uve, Spa, GA, ReliefF, RandomFrog, SiPLS
+        method_params={
+            'N': 50,      # CARS参数
+            'f': 20,
+            'cv': 10
+        },
+        # 注意: csv_file_path, label_column, spectral_columns 会在处理每个文件时设置
+        output_csv=True,
+        save_plots=True,
+        plot_name_prefix="batch_fs"
+    )
+
+
+def main():
+    """主函数"""
+    parser = argparse.ArgumentParser(description='批量特征选择工具')
+
+    # 必需参数
+    parser.add_argument('input_dir', help='包含CSV文件的输入目录')
+    parser.add_argument('output_dir', help='输出目录')
+
+    # 可选参数
+    parser.add_argument('--method', default='CARS',
+                       choices=['Cars', 'Lars', 'Uve', 'Spa', 'GA', 'ReliefF', 'RandomFrog', 'SiPLS'],
+                       help='特征选择方法 (默认: CARS)')
+    parser.add_argument('--label_column', required=True,
+                       help='标签列名或列索引 (例如: "concentration" 或 0)')
+    parser.add_argument('--spectral_columns', required=True,
+                       help='光谱列配置，支持: 列名列表 "col1 col2 col3", 列号范围 "1:10", 混合 "2,4,6-8", 或 "auto" 自动检测')
+    parser.add_argument('--parallel', action='store_true', help='启用并行处理')
+    parser.add_argument('--max_workers', type=int, help='最大并行工作数')
+    parser.add_argument('--no_csv_output', action='store_true', help='不输出CSV文件')
+    parser.add_argument('--no_plots', action='store_true', help='不生成可视化图')
+    parser.add_argument('--results_file', default='batch_results.csv', help='结果文件路径')
+
+    args = parser.parse_args()
+
+    try:
+        # 解析光谱列参数
+        if args.spectral_columns == "auto":
+            spectral_columns = "auto"
+        elif ':' in str(args.spectral_columns) or ',' in str(args.spectral_columns):
+            # 如果包含范围符号，保持为字符串让后续解析
+            spectral_columns = args.spectral_columns
+        else:
+            # 可能是空格分隔的列名列表
+            spectral_columns = args.spectral_columns.split()
+
+        # 尝试转换标签列为适当类型
+        try:
+            # 如果是数字，转换为整数
+            label_column = int(args.label_column)
+        except ValueError:
+            # 如果不是数字，当作列名
+            label_column = args.label_column
+
+        # 创建基础配置
+        base_config = FeatureSelectionConfig(
+            method=args.method,
+            method_params={},  # 使用默认参数
+            label_column=label_column,
+            spectral_columns=spectral_columns,
+            output_csv=not args.no_csv_output,
+            save_plots=not args.no_plots,
+            plot_name_prefix=f"batch_{args.method}"
+        )
+
+        # 查找CSV文件
+        csv_files = find_csv_files(args.input_dir)
+        if not csv_files:
+            print("未找到CSV文件")
+            return 1
+
+        # 执行批量特征选择
+        results = batch_feature_selection(
+            csv_files=csv_files,
+            base_config=base_config,
+            output_base_dir=args.output_dir,
+            max_workers=args.max_workers,
+            parallel=args.parallel
+        )
+
+        # 保存结果
+        results_file = Path(args.output_dir) / args.results_file
+        save_batch_results(results, results_file)
+
+        # 打印摘要
+        print_batch_summary(results)
+
+        successful = sum(1 for r in results if r['success'])
+        return 0 if successful > 0 else 1
+
+    except Exception as e:
+        print(f"批量处理失败: {e}")
+        import traceback
+        traceback.print_exc()
+        return 1
+
+
+
+def example_usage():
+    """
+    显示使用示例
+    """
+    print("=" * 80)
+    print("批量特征选择工具 - 使用指南")
+    print("=" * 80)
+
+    print("\n1. 列范围选择功能:")
+    print("   支持多种列选择方式:")
+    print("   - 列号范围: '1:10' 表示列1到列10")
+    print("   - 混合选择: '2,4,6-8' 表示列2,4,6,7,8")
+    print("   - 自动检测: 'auto' 自动选择数值列作为光谱列")
+    print("   - 列名列表: 'wavelength_400 wavelength_410 wavelength_420'")
+
+    print("\n2. 命令行使用示例:")
+    print("   # 使用列号范围")
+    print("   python batch_feature_selection.py input_dir output_dir --label_column 0 --spectral_columns 1:50")
+    print("")
+    print("   # 使用混合范围")
+    print("   python batch_feature_selection.py input_dir output_dir --label_column concentration --spectral_columns 2,4,6-8")
+    print("")
+    print("   # 自动检测光谱列")
+    print("   python batch_feature_selection.py input_dir output_dir --label_column Label --spectral_columns auto")
+
+    print("\n3. Python代码使用示例:")
+    print("""
+   from batch_feature_selection import batch_feature_selection, create_example_batch_config, find_csv_files
+
+   # 查找CSV文件
+   csv_files = find_csv_files('your/data/directory')
+
+   # 创建配置
+   base_config = create_example_batch_config()
+   base_config.label_column = 'concentration'  # 标签列名
+   base_config.spectral_columns = "5:25"       # 列5到25作为光谱列
+
+   # 执行批量处理
+   results = batch_feature_selection(
+       csv_files=csv_files,
+       base_config=base_config,
+       output_base_dir='output/directory',
+       parallel=True
+   )
+   """)
+
+    print("\n4. 支持的特征选择方法:")
+    methods = ['CARS', 'Lars', 'Uve', 'Spa', 'GA', 'ReliefF', 'RandomFrog', 'SiPLS']
+    for method in methods:
+        print(f"   - {method}")
+
+    print("\n5. 方法参数配置示例:")
+    print("""
+   # CARS方法
+   config.method_params = {'N': 50, 'f': 20, 'cv': 10}
+
+   # UVE方法
+   config.method_params = {'ncomp': 20, 'cv': 5}
+
+   # SPA方法
+   config.method_params = {'m_min': 2, 'm_max': 50, 'autoscaling': 1}
+   """)
+
+    print("=" * 80)
+
+# 查找CSV文件
+csv_files = find_csv_files("E:\code\spectronon\single_classsfication\data")
+
+# 定义所有可用的特征选择方法及其参数
+methods_config = [
+    {
+        'method': 'Cars',
+        'method_params': {'N': 50, 'f': 20, 'cv': 10},
+        'description': 'Competitive Adaptive Reweighted Sampling'
+    },
+    {
+        'method': 'Uve',
+        'method_params': {'ncomp': 20, 'cv': 5},
+        'description': 'Uninformative Variable Elimination'
+    },
+    {
+        'method': 'Spa',
+        'method_params': {'m_min': 2, 'm_max': 50, 'autoscaling': 1},
+        'description': 'Successive Projections Algorithm'
+    },
+    {
+        'method': 'GA',
+        'method_params': {'population_size': 10},
+        'description': 'Genetic Algorithm'
+    },
+    {
+        'method': 'ReliefF',
+        'method_params': {'n_neighbors': 20, 'n_features_to_keep': 20},
+        'description': 'ReliefF Algorithm'
+    },
+    {
+        'method': 'RandomFrog',
+        'method_params': {'n_frogs': 50, 'n_memeplexes': 5, 'n_evolution_steps': 10, 'n_shuffle_iterations': 10, 'cv': 5},
+        'description': 'Random Frog Leaping Algorithm'
+    },
+    {
+        'method': 'SiPLS',
+        'method_params': {'n_intervals_list': [10, 15, 20]},
+        'description': 'Synergy Interval Partial Least Squares'
+    }
+]
+
+print("=" * 80)
+print("开始批量特征选择 - 使用所有可用方法")
+print(f"找到 {len(csv_files)} 个CSV文件待处理")
+print(f"将使用 {len(methods_config)} 种特征选择方法")
+print("=" * 80)
+
+all_results = {}
+
+# 为每种方法执行批量特征选择
+for i, method_cfg in enumerate(methods_config, 1):
+    method_name = method_cfg['method']
+    description = method_cfg['description']
+
+    print(f"\n{'='*60}")
+    print(f"方法 {i}/{len(methods_config)}: {method_name}")
+    print(f"描述: {description}")
+    print(f"{'='*60}")
+
+    try:
+        # 创建该方法的配置
+        method_config = create_example_batch_config()
+        method_config.method = method_name
+        method_config.method_params = method_cfg['method_params']
+        method_config.label_column = 'Label'  # 标签列名
+        method_config.spectral_columns = "1:"       # 列1到最后作为光谱列
+        method_config.plot_name_prefix = f"{method_name.lower()}_batch_fs"
+
+        # 执行批量处理
+        method_results = batch_feature_selection(
+           csv_files=csv_files,
+           base_config=method_config,
+           output_base_dir=f'E:\\code\\spectronon\\single_classsfication\\Feature_Selection_method\\directory\\{method_name.lower()}_results',
+           parallel=True
+        )
+
+        all_results[method_name] = {
+            'results': method_results,
+            'description': description,
+            'config': method_cfg
+        }
+
+        print(f"✅ {method_name} 方法处理完成")
+
+    except Exception as e:
+        print(f"❌ {method_name} 方法处理失败: {str(e)}")
+        all_results[method_name] = {
+            'error': str(e),
+            'description': description,
+            'config': method_cfg
+        }
+
+# 输出汇总结果
+print(f"\n{'='*80}")
+print("批量特征选择处理完成汇总")
+print(f"{'='*80}")
+
+successful_methods = []
+failed_methods = []
+
+for method_name, result in all_results.items():
+    if 'error' in result:
+        failed_methods.append(f"{method_name}: {result['error']}")
+        print(f"❌ {method_name}: 失败 - {result['error']}")
+    else:
+        successful_methods.append(method_name)
+        print(f"✅ {method_name}: 成功")
+
+print(f"\n总计: {len(successful_methods)}/{len(methods_config)} 种方法成功处理")
+print(f"成功的方法: {', '.join(successful_methods)}")
+
+if failed_methods:
+    print(f"失败的方法: {len(failed_methods)} 种")
+    for failed in failed_methods:
+        print(f"  - {failed}")
+
+print(f"\n结果文件保存在: E:\\code\\spectronon\\single_classsfication\\Feature_Selection_method\\directory\\")
+print("每个方法都有独立的子目录存储结果")
+# 如果直接运行此脚本，显示使用指南
+# if __name__ == "__main__":
+    # import sys
+    # if len(sys.argv) == 1:
+    #     example_usage()
+    # else:
+    #     # 运行主函数进行批量处理
+    #     exit(main())
+
+
--- a/Feature_Selection_method/feture_select.py
+++ b/Feature_Selection_method/feture_select.py
@ -0,0 +1,594 @@
+import pandas as pd
+import numpy as np
+from Feature_Selection_method.Lar import Lar
+from Feature_Selection_method.Spa import SPA
+from Feature_Selection_method.Uve import UVE
+from Feature_Selection_method.Cars import CARS_Cloud
+from Feature_Selection_method.GA import GA
+from Feature_Selection_method.ReliefF import ReliefF
+from Feature_Selection_method.random_fog import shuffled_frog_leaping_selection
+from Feature_Selection_method.sipls import sipls_feature_selection
+from sklearn.model_selection import train_test_split
+import os
+import matplotlib.pyplot as plt
+from typing import Optional, Union, List, Tuple
+from dataclasses import dataclass, field
+
+
+def _get_x_axis_values(feature_names: List[str]) -> Tuple[Optional[np.ndarray], str]:
+    """
+    从特征名称中提取x轴数值（通常是波长）
+
+    Args:
+        feature_names: 特征名称列表
+
+    Returns:
+        (x_values, x_label): x轴数值数组和标签，如果无法提取则返回(None, "")
+    """
+    if not feature_names:
+        return None, ""
+
+    # 尝试从列名中提取数值
+    x_values = []
+    for name in feature_names:
+        try:
+            # 尝试将列名转换为浮点数
+            if isinstance(name, (int, float)):
+                x_values.append(float(name))
+            elif isinstance(name, str):
+                # 尝试提取字符串中的数值
+                # 处理类似 "400.5", "Band_400", "Wavelength_400.5nm" 的格式
+                import re
+                # 查找浮点数模式
+                match = re.search(r'(\d+\.?\d*)', str(name))
+                if match:
+                    x_values.append(float(match.group(1)))
+                else:
+                    # 如果找不到数值，返回None
+                    return None, ""
+            else:
+                return None, ""
+        except (ValueError, TypeError):
+            return None, ""
+
+    # 检查是否所有值都是唯一的（避免重复的波长）
+    if len(set(x_values)) != len(x_values):
+        return None, ""
+
+    # 检查波长范围是否合理（假设是nm单位，范围在200-2500nm之间）
+    x_array = np.array(x_values)
+    if np.min(x_array) < 200 or np.max(x_array) > 2500:
+        return None, ""
+
+    # 确定标签
+    x_label = "Wavelength (nm)"
+
+    return x_array, x_label
+
+
+def plot_feature_selection_results(X: Union[pd.DataFrame, np.ndarray],
+                                 selected_indices: Union[List[int], np.ndarray],
+                                 method_name: str,
+                                 save_path: Optional[str] = None,
+                                 figsize: Tuple[int, int] = (12, 6)) -> plt.Figure:
+    """
+    绘制特征选择结果的可视化图
+
+    Args:
+        X: 特征数据矩阵 (n_samples, n_features)
+        selected_indices: 选择的特征索引列表
+        method_name: 特征选择方法名称
+        save_path: 图片保存路径，如果为None则不保存
+        figsize: 图片尺寸
+
+    Returns:
+        matplotlib Figure对象
+    """
+    # 转换为numpy数组
+    if isinstance(X, pd.DataFrame):
+        X_array = X.values
+        feature_names = X.columns.tolist()
+    else:
+        X_array = X
+        feature_names = [f"Feature_{i}" for i in range(X.shape[1])]
+
+    # 计算平均光谱
+    mean_spectrum = np.mean(X_array, axis=0)
+    n_features = X_array.shape[1]
+
+    # 创建x轴 - 尝试使用波长值而不是索引
+    x_values, x_label = _get_x_axis_values(feature_names)
+    if x_values is None:
+        # 如果无法提取波长值，使用特征索引
+        x_values = np.arange(n_features)
+        x_label = "Feature Index"
+
+    # 创建图形
+    fig, ax = plt.subplots(figsize=figsize)
+
+    # 绘制平均光谱曲线
+    ax.plot(x_values, mean_spectrum, 'b-', linewidth=1.5, alpha=0.8, label='Mean Spectrum')
+
+    # 标注选择的特征点
+    if len(selected_indices) > 0:
+        # 确保selected_indices是有效的numpy数组
+        selected_indices = np.asarray(selected_indices, dtype=int)
+
+        # 检查索引范围
+        valid_indices = selected_indices[(selected_indices >= 0) & (selected_indices < len(x_values))]
+
+        if len(valid_indices) > 0:
+            selected_x = x_values[valid_indices]
+            selected_y = mean_spectrum[valid_indices]
+
+            ax.scatter(selected_x, selected_y, color='red', s=60, alpha=0.9,
+                      edgecolors='darkred', linewidth=1.5, label='Selected Features', zorder=5)
+
+        # 添加选择的特征数量信息
+        ax.text(0.02, 0.98, f'Selected: {len(selected_indices)}/{n_features} features',
+               transform=ax.transAxes, fontsize=10, verticalalignment='top',
+               bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.8))
+
+    # 设置标题和标签
+    ax.set_title(f'Feature Selection Results - {method_name}', fontsize=14, fontweight='bold')
+    ax.set_xlabel(x_label, fontsize=12)
+    ax.set_ylabel('Intensity', fontsize=12)
+
+    # 设置网格和图例
+    ax.grid(True, alpha=0.3)
+    ax.legend(loc='upper right', fontsize=10)
+
+    # 调整布局
+    plt.tight_layout()
+
+    # 保存图片
+    if save_path:
+        plt.savefig(save_path, dpi=300, bbox_inches='tight')
+        print(f"Visualization saved to: {save_path}")
+
+    return fig
+
+
+@dataclass
+class FeatureSelectionConfig:
+    """特征选择配置类"""
+    # CSV文件相关配置
+    csv_file_path: Optional[str] = None
+    label_column: Optional[str] = None
+    spectral_columns: Optional[List[str]] = None
+
+    # 特征选择方法配置
+    method: str = "None"
+    method_params: dict = field(default_factory=dict)
+
+    # 输出配置
+    output_csv: bool = False
+    output_dir: str = ""
+    output_filename: str = "selected_features"
+
+    # 可视化配置
+    save_plots: bool = True
+    plot_name_prefix: str = ""
+    plot_dir: Optional[str] = None  # 可视化图片保存目录，如果为None则使用output_dir
+
+    def __post_init__(self):
+        """参数校验和默认值设置"""
+        if self.csv_file_path and not os.path.exists(self.csv_file_path):
+            raise FileNotFoundError(f"CSV文件不存在: {self.csv_file_path}")
+
+        if self.csv_file_path and not self.label_column:
+            raise ValueError("指定CSV文件时必须提供标签列名(label_column)")
+
+        if self.csv_file_path and not self.spectral_columns:
+            raise ValueError("指定CSV文件时必须提供光谱列名列表(spectral_columns)")
+
+        # 设置默认的方法参数
+        self._set_default_method_params()
+
+    def _set_default_method_params(self):
+        """根据方法设置默认参数"""
+        if self.method == "Cars":
+            self.method_params.setdefault('N', 50)
+            self.method_params.setdefault('f', 20)
+            self.method_params.setdefault('cv', 10)
+        elif self.method == "Uve":
+            self.method_params.setdefault('ncomp', 20)
+            self.method_params.setdefault('cv', 5)
+        elif self.method == "Spa":
+            self.method_params.setdefault('m_min', 2)
+            self.method_params.setdefault('m_max', 50)
+            self.method_params.setdefault('autoscaling', 1)
+        elif self.method == "GA":
+            self.method_params.setdefault('population_size', 10)
+        elif self.method == "ReliefF":
+            self.method_params.setdefault('n_neighbors', 20)
+            self.method_params.setdefault('n_features_to_keep', 20)
+        elif self.method == "RandomFrog":
+            self.method_params.setdefault('n_frogs', 50)
+            self.method_params.setdefault('n_memeplexes', 5)
+            self.method_params.setdefault('n_evolution_steps', 10)
+            self.method_params.setdefault('n_shuffle_iterations', 10)
+            self.method_params.setdefault('cv', 5)
+        elif self.method == "SiPLS":
+            self.method_params.setdefault('n_intervals_list', [10, 15, 20])
+            self.method_params.setdefault('n_combinations_list', [2, 3, 4])
+            self.method_params.setdefault('max_components', 15)
+            self.method_params.setdefault('cv_folds', 5)
+
+
+class SpectrumFeatureSelector:
+    """光谱特征选择器"""
+
+    def __init__(self, config: FeatureSelectionConfig):
+        self.config = config
+
+    def load_csv_data(self) -> Tuple[pd.DataFrame, np.ndarray]:
+        """从CSV文件加载数据"""
+        if not self.config.csv_file_path:
+            raise ValueError("未指定CSV文件路径")
+
+        df = pd.read_csv(self.config.csv_file_path)
+
+        # 验证列是否存在
+        if self.config.label_column not in df.columns:
+            raise ValueError(f"标签列 '{self.config.label_column}' 不存在于CSV文件中")
+
+        missing_cols = [col for col in self.config.spectral_columns if col not in df.columns]
+        if missing_cols:
+            raise ValueError(f"以下光谱列不存在于CSV文件中: {missing_cols}")
+
+        # 提取特征和标签
+        X = df[self.config.spectral_columns]
+        y = df[self.config.label_column].values
+
+        return X, y
+
+    def save_selected_features_csv(self, X_selected: pd.DataFrame, y: np.ndarray,
+                                 selected_columns: Union[List[str], np.ndarray]):
+        """保存选定的特征到CSV文件"""
+        if not self.config.output_csv:
+            return
+
+        os.makedirs(self.config.output_dir, exist_ok=True)
+
+        # 创建结果DataFrame
+        if isinstance(selected_columns, np.ndarray):
+            selected_col_names = [f"feature_{i}" for i in selected_columns]
+        else:
+            selected_col_names = selected_columns
+
+        result_df = pd.DataFrame(X_selected.values, columns=selected_col_names)
+        result_df[self.config.label_column] = y
+
+        output_path = os.path.join(self.config.output_dir,
+                                 f"{self.config.output_filename}.csv")
+        result_df.to_csv(output_path, index=False)
+        print(f"Selected features saved to: {output_path}")
+
+    def plot_feature_selection(self, X: pd.DataFrame,
+                             selected_indices: Union[List[int], np.ndarray]) -> Optional[plt.Figure]:
+        """绘制特征选择结果可视化"""
+        if not self.config.save_plots:
+            return None
+
+        # 确定保存目录
+        plot_dir = self.config.plot_dir if self.config.plot_dir else self.config.output_dir
+        if not plot_dir:
+            return None
+
+        os.makedirs(plot_dir, exist_ok=True)
+
+        # 生成文件名
+        filename = f"{self.config.plot_name_prefix}_{self.config.method}_feature_selection.png"
+        save_path = os.path.join(plot_dir, filename)
+
+        # 绘制可视化图
+        fig = plot_feature_selection_results(
+            X=X,
+            selected_indices=selected_indices,
+            method_name=self.config.method,
+            save_path=save_path
+        )
+
+        return fig
+
+    def _convert_to_indices(self, X: pd.DataFrame, selected_columns) -> List[int]:
+        """
+        将selected_columns转换为原始DataFrame X的索引列表
+
+        Args:
+            X: 原始DataFrame
+            selected_columns: 选择的列，可以是索引数组、列名列表等
+
+        Returns:
+            索引列表
+        """
+        try:
+            # 处理pandas Index对象
+            if hasattr(selected_columns, 'tolist'):  # pandas Index or Series
+                selected_columns = selected_columns.tolist()
+
+            if isinstance(selected_columns, np.ndarray):
+                # 如果是numpy数组，直接作为索引
+                return selected_columns.tolist()
+            elif isinstance(selected_columns, list) and len(selected_columns) > 0:
+                if isinstance(selected_columns[0], str):
+                    # 如果是列名列表，转换为索引
+                    indices = []
+                    for col in selected_columns:
+                        try:
+                            # 首先尝试精确匹配
+                            idx = X.columns.get_loc(col)
+                            indices.append(idx)
+                        except KeyError:
+                            # 如果精确匹配失败，尝试数值近似匹配（处理小数点精度问题）
+                            try:
+                                target_value = float(col)
+                                # 找到最接近的列名
+                                best_match = None
+                                best_diff = float('inf')
+                                best_idx = None
+
+                                for i, col_name in enumerate(X.columns):
+                                    try:
+                                        col_value = float(col_name)
+                                        diff = abs(col_value - target_value)
+                                        if diff < best_diff:
+                                            best_diff = diff
+                                            best_match = col_name
+                                            best_idx = i
+                                    except (ValueError, TypeError):
+                                        continue
+
+                                if best_match is not None and best_diff < 1.0:  # 允许1.0以内的误差
+                                    print(f"Approximate match: '{col}' -> '{best_match}' (diff: {best_diff:.3f})")
+                                    indices.append(best_idx)
+                                else:
+                                    print(f"Warning: No suitable match found for column '{col}' in DataFrame columns")
+                                    continue
+                            except (ValueError, TypeError):
+                                print(f"Warning: Cannot parse column name '{col}' as numeric")
+                                continue
+                    return indices
+                else:
+                    # 如果是数字列表，直接作为索引
+                    return [int(idx) for idx in selected_columns]
+            else:
+                return []
+        except Exception as e:
+            print(f"Error converting selected_columns to indices: {e}")
+            return []
+
+    def select_features(self, X: Optional[pd.DataFrame] = None, y: Optional[np.ndarray] = None,
+                       column_names: Optional[List[str]] = None) -> Tuple[pd.DataFrame, np.ndarray, Union[List[str], np.ndarray]]:
+        """
+        执行特征选择
+
+        Args:
+            X: 特征数据，如果为None则从CSV文件加载
+            y: 标签数据，如果为None则从CSV文件加载
+            column_names: 列名，用于numpy数组输入
+
+        Returns:
+            X_selected: 选定的特征数据
+            y: 标签数据
+            selected_columns: 选定的列名或索引
+        """
+        # 如果没有提供数据，从CSV加载
+        if X is None or y is None:
+            X, y = self.load_csv_data()
+
+        # 确保X是DataFrame格式
+        if isinstance(X, np.ndarray):
+            if column_names is not None:
+                X = pd.DataFrame(X, columns=column_names)
+            else:
+                X = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(X.shape[1])])
+
+        # 执行特征选择
+        X_selected, y_selected, selected_columns = SpctrumFeatureSelcet(
+            method=self.config.method,
+            X=X,
+            y=y,
+            name=self.config.plot_name_prefix,
+            result_dir=self.config.output_dir if self.config.save_plots else '',
+            column_names=None  # 已经转换为DataFrame，不再需要column_names
+        )
+
+        # 保存结果到CSV（如果配置了）
+        self.save_selected_features_csv(X_selected, y_selected, selected_columns)
+
+        # 生成可视化图（如果配置了）
+        if self.config.save_plots:
+            # 转换selected_columns为原始数据集X中的索引列表
+            # selected_columns对应X_selected中的列，我们需要找到它们在原始数据集X中的位置
+            selected_indices = self._convert_to_indices(X, selected_columns)
+
+            if len(selected_indices) > 0:
+                self.plot_feature_selection(X, selected_indices)
+            else:
+                print(f"Warning: No valid indices found for plotting. selected_columns: {selected_columns}")
+                print(f"Available columns in X: {list(X.columns[:5])}...")  # 显示前5个列名用于调试
+
+        return X_selected, y_selected, selected_columns
+
+
+def SpctrumFeatureSelcet(method, X, y, name='', result_dir='', column_names=None, method_params=None):
+    """
+    核心特征选择函数（保持原有业务逻辑不变）
+
+    :param method: 波长筛选/降维的方法，包括：Cars, Lars, Uve, Spa, GA, ReliefF, RandomFrog, SiPLS。
+    :param X: 光谱数据，可以是 pandas DataFrame 或 numpy array (n_samples, n_features)。
+    :param y: 光谱数据对应的标签 (n_samples,)。
+    :param name: 结果图像的文件名。
+    :param result_dir: 保存结果的文件夹路径。
+    :param column_names: 如果 X 是 numpy array，需要提供列名列表。
+    :param method_params: 方法特定的参数字典。
+    :return:
+        - X_Feature: 选择/降维后的数据 (n_samples, n_features)。
+        - y: 对应的标签。
+        - selected_columns: 选择的特征列名或索引。
+    """
+    if method_params is None:
+        method_params = {}
+
+    global X_Feature
+
+    # 判断输入数据类型并转换为 DataFrame（如有必要）
+    if isinstance(X, np.ndarray):
+        if column_names is None:
+            column_names = [f"{i}" for i in range(X.shape[1])]  # 默认列名
+        X_df = pd.DataFrame(X, columns=column_names)
+    else:
+        X_df = X
+
+    # 根据所选方法执行特征选择
+    if method == "None":
+        X_Feature = X_df
+        selected_columns = X_df.columns
+    elif method == "Cars":
+        save_path = os.path.join(result_dir, f"{name}_cars.png") if result_dir else None
+        # 调用 CARS_Cloud 并获取结果，使用配置的参数
+        N = method_params.get('N', 50)
+        f = method_params.get('f', 20)
+        cv = method_params.get('cv', 10)
+
+        Featuresecletidx = CARS_Cloud(X_df.values, y, N=N, f=f, cv=cv,
+                                    save_fig=bool(save_path), save_path=save_path)
+        Featuresecletidx = Featuresecletidx.astype(int)
+        X_Feature = X_df.iloc[:, Featuresecletidx]
+        selected_columns = Featuresecletidx
+
+    elif method == "Lars":
+        Featuresecletidx = Lar(X_df.values, y)
+        X_Feature = X_df.iloc[:, Featuresecletidx]
+        selected_columns = X_df.columns[Featuresecletidx]
+    elif method == "Uve":
+        ncomp = method_params.get('ncomp', 20)
+        cv = method_params.get('cv', 5)
+
+        uve = UVE(X_df.values, y, ncomp)
+        uve.calcCriteria()
+        uve.evalCriteria(cv=cv)
+        Featuresecletidx = uve.cutFeature()  # 返回所选特征的索引
+        X_Feature = X_df.iloc[:, Featuresecletidx]
+        selected_columns = X_df.columns[Featuresecletidx]
+    elif method == "Spa":
+        save_path = os.path.join(result_dir, f"{name}_spa.png") if result_dir else None
+
+        Xcal, Xval, ycal, yval = train_test_split(X_df, y, test_size=0.3)
+
+        m_min = method_params.get('m_min', 2)
+        m_max = method_params.get('m_max', 50)
+        autoscaling = method_params.get('autoscaling', 1)
+
+        Featuresecletidx, var_sel_phase2 = SPA().spa(
+            Xcal, ycal, m_min=m_min, m_max=m_max, Xval=Xval, yval=yval,
+            autoscaling=autoscaling, save_path=save_path)
+        X_Feature = X_df.iloc[:, Featuresecletidx]
+        selected_columns = X_df.columns[Featuresecletidx]
+    elif method == "GA":
+        population_size = method_params.get('population_size', 10)
+        Featuresecletidx = GA(X_df.values, y, population_size)
+        X_Feature = X_df.iloc[:, Featuresecletidx]
+        selected_columns = X_df.columns[Featuresecletidx]
+    elif method == "ReliefF":
+        n_neighbors = method_params.get('n_neighbors', 20)
+        n_features_to_keep = method_params.get('n_features_to_keep', 20)
+
+        relieff = ReliefF(n_neighbors=n_neighbors, n_features_to_keep=n_features_to_keep)
+        Featuresecletidx = relieff.fit(X_df.values, y)
+        X_Feature = X_df.iloc[:, Featuresecletidx]
+        selected_columns = X_df.columns[Featuresecletidx]
+    elif method == "RandomFrog":
+        n_frogs = method_params.get('n_frogs', 50)
+        n_memeplexes = method_params.get('n_memeplexes', 5)
+        n_evolution_steps = method_params.get('n_evolution_steps', 10)
+        n_shuffle_iterations = method_params.get('n_shuffle_iterations', 10)
+        cv = method_params.get('cv', 5)
+
+        Featuresecletidx = shuffled_frog_leaping_selection(
+            X_df.values, y,
+            n_frogs=n_frogs,
+            n_memeplexes=n_memeplexes,
+            n_evolution_steps=n_evolution_steps,
+            n_shuffle_iterations=n_shuffle_iterations,
+            cv=cv
+        )
+        X_Feature = X_df.iloc[:, Featuresecletidx]
+        selected_columns = X_df.columns[Featuresecletidx]
+    elif method == "SiPLS":
+        n_intervals_list = method_params.get('n_intervals_list', [10, 15, 20])
+        n_combinations_list = method_params.get('n_combinations_list', [2, 3, 4])
+        max_components = method_params.get('max_components', 15)
+        cv_folds = method_params.get('cv_folds', 5)
+
+        result = sipls_feature_selection(
+            X_df.values, y,
+            n_intervals_list=n_intervals_list,
+            n_combinations_list=n_combinations_list,
+            max_components=max_components,
+            cv_folds=cv_folds
+        )
+
+        if result and 'selected_wavelengths' in result:
+            Featuresecletidx = result['selected_wavelengths']
+            X_Feature = X_df.iloc[:, Featuresecletidx]
+            selected_columns = X_df.columns[Featuresecletidx]
+        else:
+            raise ValueError("SiPLS算法未能找到有效的特征选择结果")
+
+    else:
+        raise ValueError(f"不支持的特征选择方法: {method}。支持的方法包括: None, Cars, Lars, Uve, Spa, GA, ReliefF, RandomFrog, SiPLS")
+
+    return X_Feature, y, selected_columns  # 返回所选特征数据、标签和列名
+
+
+# 便捷函数，用于向后兼容和简化使用
+def select_features_from_csv(config: FeatureSelectionConfig) -> Tuple[pd.DataFrame, np.ndarray, Union[List[str], np.ndarray]]:
+    """
+    从CSV文件进行特征选择的主要接口函数
+
+    Args:
+        config: 特征选择配置对象
+
+    Returns:
+        X_selected: 选定的特征数据
+        y: 标签数据
+        selected_columns: 选定的列名或索引
+    """
+    selector = SpectrumFeatureSelector(config)
+    return selector.select_features()
+
+
+def select_features_from_data(X: pd.DataFrame, y: np.ndarray, method: str,
+                            method_params: Optional[dict] = None,
+                            name: str = '', result_dir: str = '',
+                            column_names: Optional[List[str]] = None) -> Tuple[pd.DataFrame, np.ndarray, Union[List[str], np.ndarray]]:
+    """
+    直接从数据进行特征选择的便捷函数
+
+    Args:
+        X: 特征数据
+        y: 标签数据
+        method: 特征选择方法
+        method_params: 方法参数
+        name: 输出文件名前缀
+        result_dir: 输出目录
+        column_names: 列名
+
+    Returns:
+        X_selected: 选定的特征数据
+        y: 标签数据
+        selected_columns: 选定的列名或索引
+    """
+    config = FeatureSelectionConfig(
+        method=method,
+        method_params=method_params or {},
+        output_csv=False,  # 直接数据输入不输出CSV
+        save_plots=bool(result_dir),
+        plot_name_prefix=name
+    )
+
+    selector = SpectrumFeatureSelector(config)
+    return selector.select_features(X=X, y=y, column_names=column_names)
--- a/Feature_Selection_method/random_fog.py
+++ b/Feature_Selection_method/random_fog.py
@ -0,0 +1,292 @@
+import numpy as np
+from sklearn.model_selection import cross_val_score
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.base import clone
+import copy
+
+
+class ShuffledFrogLeaping:
+    """
+    随机蛙跳算法 (Shuffled Frog Leaping Algorithm, SFLA) 进行特征选择
+
+    算法原理：
+    1. 将青蛙种群分成多个小组（memeplexes）
+    2. 在每个小组内进行局部搜索和进化
+    3. 定期重组所有青蛙，进行全局信息交换
+    4. 重复直到满足停止条件
+    """
+
+    def __init__(self, n_frogs=50, n_memeplexes=5, n_evolution_steps=10,
+                 n_shuffle_iterations=10, classifier=None, cv=5):
+        """
+        初始化随机蛙跳算法参数
+
+        参数:
+            n_frogs: 青蛙种群大小
+            n_memeplexes: 小组数量
+            n_evolution_steps: 每个小组的进化步数
+            n_shuffle_iterations: 重组迭代次数
+            classifier: 用于评估特征子集的分类器
+            cv: 交叉验证折数
+        """
+        self.n_frogs = n_frogs
+        self.n_memeplexes = n_memeplexes
+        self.n_evolution_steps = n_evolution_steps
+        self.n_shuffle_iterations = n_shuffle_iterations
+        self.classifier = classifier or RandomForestClassifier(random_state=42, n_estimators=50)
+        self.cv = cv
+
+        # 算法内部变量
+        self.n_features = None
+        self.frogs = None  # 青蛙种群，每个青蛙是一个二进制向量
+        self.fitness_values = None
+        self.best_frog = None
+        self.best_fitness = -np.inf
+        self.selected_features = None
+
+    def _initialize_population(self):
+        """初始化青蛙种群"""
+        self.frogs = []
+        for _ in range(self.n_frogs):
+            # 随机初始化二进制向量，1表示选择该特征，0表示不选择
+            frog = np.random.randint(0, 2, self.n_features)
+            self.frogs.append(frog)
+        self.frogs = np.array(self.frogs)
+
+    def _evaluate_fitness(self, X, y):
+        """评估所有青蛙的适应度"""
+        self.fitness_values = []
+        for frog in self.frogs:
+            fitness = self._calculate_fitness(frog, X, y)
+            self.fitness_values.append(fitness)
+
+            # 更新全局最优
+            if fitness > self.best_fitness:
+                self.best_fitness = fitness
+                self.best_frog = frog.copy()
+
+        self.fitness_values = np.array(self.fitness_values)
+
+    def _calculate_fitness(self, frog, X, y):
+        """计算单个青蛙的适应度"""
+        selected_features = np.where(frog == 1)[0]
+
+        # 如果没有选择任何特征，返回最低适应度
+        if len(selected_features) == 0:
+            return 0.0
+
+        # 使用选择的特征进行交叉验证
+        X_selected = X[:, selected_features]
+
+        try:
+            scores = cross_val_score(clone(self.classifier), X_selected, y, cv=self.cv)
+            return np.mean(scores)
+        except:
+            # 如果交叉验证失败，返回低适应度
+            return 0.0
+
+    def _divide_into_memeplexes(self):
+        """将青蛙按适应度排序并分成小组"""
+        # 按适应度降序排序
+        sorted_indices = np.argsort(self.fitness_values)[::-1]
+        self.frogs = self.frogs[sorted_indices]
+        self.fitness_values = self.fitness_values[sorted_indices]
+
+        # 分成小组
+        memeplexes = []
+        frogs_per_memeplex = self.n_frogs // self.n_memeplexes
+
+        for i in range(self.n_memeplexes):
+            start_idx = i * frogs_per_memeplex
+            if i == self.n_memeplexes - 1:
+                # 最后一个小组包含剩余的所有青蛙
+                end_idx = self.n_frogs
+            else:
+                end_idx = (i + 1) * frogs_per_memeplex
+
+            memeplex = {
+                'frogs': self.frogs[start_idx:end_idx].copy(),
+                'fitness': self.fitness_values[start_idx:end_idx].copy()
+            }
+            memeplexes.append(memeplex)
+
+        return memeplexes
+
+    def _evolve_memeplex(self, memeplex, X, y):
+        """进化单个小组"""
+        frogs = memeplex['frogs']
+        fitness = memeplex['fitness']
+
+        # 找出小组中的最好和最坏青蛙
+        best_idx = np.argmax(fitness)
+        worst_idx = np.argmin(fitness)
+
+        best_frog = frogs[best_idx]
+        worst_frog = frogs[worst_idx]
+
+        # 对最坏的青蛙进行进化
+        for step in range(self.n_evolution_steps):
+            # 生成新的青蛙： worst_frog + rand() * (best_frog - worst_frog)
+            rand = np.random.random(self.n_features)
+            new_frog = worst_frog + rand * (best_frog - worst_frog)
+
+            # 二进制化：大于0.5的为1，否则为0
+            new_frog = (new_frog > 0.5).astype(int)
+
+            # 确保至少选择一个特征
+            if np.sum(new_frog) == 0:
+                new_frog[np.random.randint(self.n_features)] = 1
+
+            # 计算新青蛙的适应度
+            new_fitness = self._calculate_fitness(new_frog, X, y)
+
+            # 如果新青蛙更好，替换最坏的青蛙
+            if new_fitness > fitness[worst_idx]:
+                frogs[worst_idx] = new_frog
+                fitness[worst_idx] = new_fitness
+
+                # 更新小组内的最好青蛙
+                if new_fitness > fitness[best_idx]:
+                    best_idx = worst_idx
+                    best_frog = new_frog
+
+                # 重新找出最坏的青蛙
+                worst_idx = np.argmin(fitness)
+                worst_frog = frogs[worst_idx]
+            else:
+                # 如果没有改善，随机生成一个新青蛙
+                new_frog = np.random.randint(0, 2, self.n_features)
+                if np.sum(new_frog) == 0:
+                    new_frog[np.random.randint(self.n_features)] = 1
+
+                new_fitness = self._calculate_fitness(new_frog, X, y)
+
+                if new_fitness > fitness[worst_idx]:
+                    frogs[worst_idx] = new_frog
+                    fitness[worst_idx] = new_fitness
+
+        return frogs, fitness
+
+    def fit(self, X, y):
+        """
+        运行随机蛙跳算法进行特征选择
+
+        参数:
+            X: 特征矩阵 (n_samples, n_features)
+            y: 标签向量 (n_samples,)
+
+        返回:
+            selected_features: 选择的特征索引列表
+        """
+        self.n_features = X.shape[1]
+
+        # 初始化种群
+        self._initialize_population()
+
+        # 初始评估
+        self._evaluate_fitness(X, y)
+
+        # 主循环
+        for iteration in range(self.n_shuffle_iterations):
+            # 将青蛙分成小组
+            memeplexes = self._divide_into_memeplexes()
+
+            # 进化每个小组
+            evolved_frogs = []
+            evolved_fitness = []
+
+            for memeplex in memeplexes:
+                evolved_frog, evolved_fit = self._evolve_memeplex(memeplex, X, y)
+                evolved_frogs.extend(evolved_frog)
+                evolved_fitness.extend(evolved_fit)
+
+            # 更新种群
+            self.frogs = np.array(evolved_frogs)
+            self.fitness_values = np.array(evolved_fitness)
+
+            # 再次评估所有青蛙（确保一致性）
+            self._evaluate_fitness(X, y)
+
+        # 返回最优解
+        self.selected_features = np.where(self.best_frog == 1)[0]
+        return self.selected_features.tolist()
+
+    def get_feature_importance(self):
+        """获取特征选择结果的统计信息"""
+        if self.selected_features is None:
+            raise ValueError("请先运行 fit 方法")
+
+        n_selected = len(self.selected_features)
+        selection_ratio = n_selected / self.n_features
+
+        return {
+            'selected_features': self.selected_features,
+            'n_selected': n_selected,
+            'n_total': self.n_features,
+            'selection_ratio': selection_ratio,
+            'best_fitness': self.best_fitness
+        }
+
+
+def shuffled_frog_leaping_selection(X, y, n_frogs=50, n_memeplexes=5,
+                                  n_evolution_steps=10, n_shuffle_iterations=10,
+                                  classifier=None, cv=5):
+    """
+    使用随机蛙跳算法进行特征选择
+
+    参数:
+        X: 特征矩阵 (n_samples, n_features)
+        y: 标签向量 (n_samples,)
+        n_frogs: 青蛙种群大小
+        n_memeplexes: 小组数量
+        n_evolution_steps: 每个小组的进化步数
+        n_shuffle_iterations: 重组迭代次数
+        classifier: 用于评估特征子集的分类器
+        cv: 交叉验证折数
+
+    返回:
+        selected_features: 选择的特征索引列表
+    """
+    sfla = ShuffledFrogLeaping(
+        n_frogs=n_frogs,
+        n_memeplexes=n_memeplexes,
+        n_evolution_steps=n_evolution_steps,
+        n_shuffle_iterations=n_shuffle_iterations,
+        classifier=classifier,
+        cv=cv
+    )
+
+    return sfla.fit(X, y)
+
+
+# 使用示例
+if __name__ == "__main__":
+    # 生成示例数据
+    from sklearn.datasets import make_classification
+
+    X, y = make_classification(
+        n_samples=200,
+        n_features=50,
+        n_informative=10,
+        n_redundant=10,
+        n_clusters_per_class=1,
+        random_state=42
+    )
+
+    print("原始特征数量:", X.shape[1])
+
+    # 使用随机蛙跳算法进行特征选择
+    selected_features = shuffled_frog_leaping_selection(
+        X, y,
+        n_frogs=30,
+        n_memeplexes=3,
+        n_evolution_steps=5,
+        n_shuffle_iterations=5
+    )
+
+    print("选择的特征数量:", len(selected_features))
+    print("选择的特征索引:", selected_features)
+
+    # 计算选择率
+    selection_ratio = len(selected_features) / X.shape[1]
+    print(".2f")
--- a/Feature_Selection_method/sipls.py
+++ b/Feature_Selection_method/sipls.py
@ -0,0 +1,271 @@
+import numpy as np
+import pandas as pd
+from sklearn.cross_decomposition import PLSRegression
+from sklearn.metrics import mean_squared_error
+from sklearn.model_selection import KFold
+from itertools import combinations
+import matplotlib.pyplot as plt
+
+
+def synergy_interval_pls(X, y, n_intervals=20, n_combinations=2, max_components=15, cv_folds=5):
+    """
+    协同区间偏最小二乘法 (Synergy Interval PLS, SiPLS) 进行特征选择
+
+    参数:
+        X: 光谱矩阵 (n_samples, n_wavelengths)
+        y: 浓度/属性向量 (n_samples,)
+        n_intervals: 将光谱分成多少个等宽区间
+        n_combinations: 每次选择的区间组合数量 (通常2-4)
+        max_components: PLS的最大主成分数
+        cv_folds: 交叉验证折数
+
+    返回:
+        best_intervals: 最优的区间组合
+        best_rmsecv: 最优组合的RMSECV
+        best_n_components: 最优的主成分数
+        selected_wavelengths: 选择的波长索引
+    """
+
+    n_samples, n_wavelengths = X.shape
+
+    # 将光谱分成等宽的区间
+    interval_size = n_wavelengths // n_intervals
+    intervals = []
+
+    for i in range(n_intervals):
+        start_idx = i * interval_size
+        if i == n_intervals - 1:
+            # 最后一个区间包含剩余的所有波长
+            end_idx = n_wavelengths
+        else:
+            end_idx = (i + 1) * interval_size
+
+        intervals.append((start_idx, end_idx))
+
+    print(f"将 {n_wavelengths} 个波长分成 {n_intervals} 个区间:")
+    for i, (start, end) in enumerate(intervals):
+        print(f"  区间 {i+1}: 波长 {start}-{end-1} (宽度: {end-start})")
+
+    # 生成所有可能的区间组合
+    interval_combinations = list(combinations(range(n_intervals), n_combinations))
+
+    print(f"\n总共 {len(interval_combinations)} 个 {n_combinations} 区间的组合")
+
+    best_rmsecv = float('inf')
+    best_intervals = None
+    best_n_components = None
+    results = []
+
+    # 对每个组合进行评估
+    for combo_idx, combo in enumerate(interval_combinations):
+        if (combo_idx + 1) % 50 == 0:
+            print(f"正在处理组合 {combo_idx + 1}/{len(interval_combinations)}")
+
+        # 合并选中区间的光谱数据
+        selected_wavelengths = []
+        for interval_idx in combo:
+            start_idx, end_idx = intervals[interval_idx]
+            selected_wavelengths.extend(range(start_idx, end_idx))
+
+        X_selected = X[:, selected_wavelengths]
+
+        # 对不同主成分数进行交叉验证
+        kf = KFold(n_splits=cv_folds, shuffle=True, random_state=42)
+        rmse_results = []
+
+        for n_comp in range(1, min(max_components + 1, X_selected.shape[1] + 1)):
+            rmse_scores = []
+
+            for train_idx, test_idx in kf.split(X_selected):
+                X_train, X_test = X_selected[train_idx], X_selected[test_idx]
+                y_train, y_test = y[train_idx], y[test_idx]
+
+                pls = PLSRegression(n_components=n_comp)
+                pls.fit(X_train, y_train)
+                y_pred = pls.predict(X_test)
+
+                rmse = np.sqrt(mean_squared_error(y_test, y_pred))
+                rmse_scores.append(rmse)
+
+            mean_rmse = np.mean(rmse_scores)
+            rmse_results.append(mean_rmse)
+
+        # 找到该组合的最佳主成分数和RMSE
+        min_rmse_idx = np.argmin(rmse_results)
+        min_rmse = rmse_results[min_rmse_idx]
+        best_comp = min_rmse_idx + 1
+
+        results.append({
+            'intervals': combo,
+            'rmsecv': min_rmse,
+            'n_components': best_comp,
+            'wavelengths': selected_wavelengths
+        })
+
+        # 更新全局最优
+        if min_rmse < best_rmsecv:
+            best_rmsecv = min_rmse
+            best_intervals = combo
+            best_n_components = best_comp
+
+    print("最优结果:")
+    print(f"  区间组合: {best_intervals}")
+    print(f"  RMSECV: {best_rmsecv:.6f}")
+    print(f"  主成分数: {best_n_components}")
+    print(f"  选择的波长数: {len(results[-1]['wavelengths'])}")
+
+    # 返回最优区间的波长索引
+    selected_wavelengths = []
+    for interval_idx in best_intervals:
+        start_idx, end_idx = intervals[interval_idx]
+        selected_wavelengths.extend(range(start_idx, end_idx))
+
+    return selected_wavelengths, best_rmsecv, best_n_components
+
+
+def sipls_feature_selection(X, y, n_intervals_list=[10, 15, 20], n_combinations_list=[2, 3, 4],
+                           max_components=15, cv_folds=5):
+    """
+    高级SiPLS特征选择，尝试不同的参数组合
+
+    参数:
+        X: 光谱矩阵 (n_samples, n_wavelengths)
+        y: 浓度/属性向量 (n_samples,)
+        n_intervals_list: 尝试的区间数量列表
+        n_combinations_list: 尝试的组合数量列表
+        max_components: PLS的最大主成分数
+        cv_folds: 交叉验证折数
+
+    返回:
+        best_result: 包含最优结果的字典
+    """
+
+    best_overall_rmsecv = float('inf')
+    best_overall_result = None
+
+    print("=== SiPLS 特征选择 ===")
+    print(f"数据形状: {X.shape}")
+    print(f"尝试的参数组合: {len(n_intervals_list)} × {len(n_combinations_list)} = {len(n_intervals_list) * len(n_combinations_list)}")
+
+    for n_intervals in n_intervals_list:
+        for n_combinations in n_combinations_list:
+            print(f"\n--- 测试参数: 区间数={n_intervals}, 组合数={n_combinations} ---")
+
+            try:
+                selected_wavelengths, rmsecv, n_components = synergy_interval_pls(
+                    X, y,
+                    n_intervals=n_intervals,
+                    n_combinations=n_combinations,
+                    max_components=max_components,
+                    cv_folds=cv_folds
+                )
+
+                if rmsecv < best_overall_rmsecv:
+                    best_overall_rmsecv = rmsecv
+                    best_overall_result = {
+                        'selected_wavelengths': selected_wavelengths,
+                        'rmsecv': rmsecv,
+                        'n_components': n_components,
+                        'n_intervals': n_intervals,
+                        'n_combinations': n_combinations,
+                        'selection_ratio': len(selected_wavelengths) / X.shape[1]
+                    }
+
+            except Exception as e:
+                print(f"参数组合 (区间数={n_intervals}, 组合数={n_combinations}) 处理失败: {str(e)}")
+                continue
+
+    if best_overall_result:
+        print("=== 最终最优结果 ===")
+        print(f"区间数: {best_overall_result['n_intervals']}")
+        print(f"组合数: {best_overall_result['n_combinations']}")
+        print(f"RMSECV: {best_overall_result['rmsecv']:.6f}")
+        print(f"主成分数: {best_overall_result['n_components']}")
+        print(f"选择的波长数: {len(best_overall_result['selected_wavelengths'])}")
+        print(f"选择率: {best_overall_result['selection_ratio']:.3f}")
+
+    return best_overall_result
+
+
+def plot_sipls_results(X, selected_wavelengths, title="SiPLS Selected Wavelengths"):
+    """
+    绘制SiPLS选择结果的可视化图
+
+    参数:
+        X: 原始光谱矩阵
+        selected_wavelengths: 选择的波长索引
+        title: 图表标题
+    """
+    n_wavelengths = X.shape[1]
+    wavelength_indices = np.arange(n_wavelengths)
+
+    # 创建选择掩码
+    selection_mask = np.zeros(n_wavelengths, dtype=bool)
+    selection_mask[selected_wavelengths] = True
+
+    plt.figure(figsize=(12, 6))
+
+    # 绘制平均光谱
+    mean_spectrum = np.mean(X, axis=0)
+    plt.plot(wavelength_indices, mean_spectrum, 'b-', alpha=0.7, label='Mean Spectrum')
+
+    # 高亮选择的波长
+    plt.scatter(wavelength_indices[selection_mask], mean_spectrum[selection_mask],
+               color='red', s=50, alpha=0.8, label='Selected Wavelengths')
+
+    plt.xlabel('Wavelength Index')
+    plt.ylabel('Intensity')
+    plt.title(title)
+    plt.legend()
+    plt.grid(True, alpha=0.3)
+
+    return plt.gcf()
+
+
+# 使用示例
+if __name__ == "__main__":
+    # 生成模拟光谱数据
+    np.random.seed(42)
+    n_samples = 100
+    n_wavelengths = 1000
+
+    # 模拟光谱数据（高斯峰）
+    wavelengths = np.linspace(400, 2500, n_wavelengths)
+    X = np.zeros((n_samples, n_wavelengths))
+
+    # 添加一些特征峰
+    peak_positions = [500, 800, 1200, 1800, 2200]  # nm
+    peak_indices = [np.argmin(np.abs(wavelengths - pos)) for pos in peak_positions]
+
+    for i in range(n_samples):
+        for peak_idx in peak_indices:
+            # 添加高斯峰
+            gaussian = np.exp(-0.5 * ((np.arange(n_wavelengths) - peak_idx) / 50)**2)
+            X[i] += gaussian * np.random.uniform(0.5, 1.5)
+
+        # 添加噪声
+        X[i] += np.random.normal(0, 0.1, n_wavelengths)
+
+    # 生成模拟浓度数据（与某些峰相关）
+    y = (X[:, peak_indices[0]] + X[:, peak_indices[2]] + X[:, peak_indices[4]]) / 3
+    y += np.random.normal(0, 0.05, n_samples)  # 添加噪声
+
+    print("模拟数据生成完成")
+    print(f"数据形状: {X.shape}")
+    print(".3f")
+
+    # 运行SiPLS特征选择
+    result = sipls_feature_selection(
+        X, y,
+        n_intervals_list=[10, 15],
+        n_combinations_list=[2, 3],
+        max_components=10,
+        cv_folds=5
+    )
+
+    if result:
+        print(f"\n选择的波长索引: {result['selected_wavelengths'][:10]}...")  # 只显示前10个
+
+        # 绘制结果
+        fig = plot_sipls_results(X, result['selected_wavelengths'])
+        plt.show()