import pandas as pd import numpy as np import matplotlib.pyplot as plt from WaveSelect.Lar import Lar from WaveSelect.Spa import SPA from WaveSelect.Uve import UVE from WaveSelect.Cars import CARS_Cloud from WaveSelect.GA import GA from WaveSelect.ReliefF import ReliefF from sklearn.model_selection import train_test_split from collections import Counter from tqdm import tqdm import os def IntegratedWaveSelect(methods, X, y, strategy="voting", column_names=None, name='', result_dir=''): # 检查并确保 X 是 DataFrame if isinstance(X, np.ndarray): if column_names is None: column_names = [f"{i}" for i in range(X.shape[1])] X_df = pd.DataFrame(X, columns=column_names) else: X_df = X feature_indices_list = [] for method in tqdm(methods, desc="Processing Feature Selection Methods"): print(f"Applying method: {method}") # 调试信息 if method == "Cars": save_path = os.path.join(result_dir, f"{name}_cars.png") Featuresecletidx = CARS_Cloud(X, y, N=50, f=20, cv=10, save_path=save_path) Featuresecletidx = Featuresecletidx.astype(int) feature_indices_list.append(Featuresecletidx) elif method == "Lars": Featuresecletidx = Lar(X_df.values, y) feature_indices_list.append(Featuresecletidx) elif method == "Uve": uve = UVE(X_df.values, y, 20) uve.calcCriteria() uve.evalCriteria(cv=5) Featuresecletidx = uve.cutFeature() feature_indices_list.append(Featuresecletidx) elif method == "Spa": save_path = os.path.join(result_dir, f"{name}_spa.png") Xcal, Xval, ycal, yval = train_test_split(X_df, y, test_size=0.2) Featuresecletidx = SPA().spa( Xcal=Xcal.values, ycal=ycal, m_min=4, m_max=32, Xval=Xval.values, yval=yval, autoscaling=1,save_path=save_path) feature_indices_list.append(Featuresecletidx) elif method == "GA": Featuresecletidx = GA(X_df.values, y, 10) feature_indices_list.append(Featuresecletidx) elif method == "ReliefF": relieff = ReliefF(n_neighbors=20, n_features_to_keep=20) Featuresecletidx = relieff.fit(X_df.values, y) feature_indices_list.append(Featuresecletidx) else: print(f"No such method: {method}") continue print(f"Selected indices by {method}: {Featuresecletidx}") # 调试信息 print("Feature indices list after all methods:", feature_indices_list) # 调试信息 if strategy == "voting": if feature_indices_list: all_indices = np.concatenate(feature_indices_list) print("All indices concatenated:", all_indices) # 调试信息 counter = Counter(all_indices) print("Counter result:", counter) # 调试信息 selected_features = [ idx for idx, count in tqdm(counter.items(), desc="Voting Selection") if count > len(methods) / 2 ] print("Selected features after voting:", selected_features) # 调试信息 else: print("No features selected by any method.") # 提示信息 selected_features = [] elif strategy == "weighted": weights = {method: 1 for method in methods} weighted_counts = Counter() for method, indices in zip(methods, feature_indices_list): for idx in indices: weighted_counts[idx] += weights[method] print("Weighted counts:", weighted_counts) # 调试信息 selected_features = [ idx for idx, count in tqdm(weighted_counts.items(), desc="Weighted Selection") if count >= np.mean(list(weighted_counts.values())) ] print("Selected features after weighted strategy:", selected_features) # 调试信息 elif strategy == "bagging": num_iterations = 5 selected_features = set() for _ in tqdm(range(num_iterations), desc="Bagging Iterations"): X_sample, _, y_sample, _ = train_test_split(X_df, y, test_size=0.5) sub_feature_indices_list = [] for method in methods: if method == "Spa": Xcal, Xval, ycal, yval = train_test_split(X_sample, y_sample, test_size=0.2) sub_feature_indices = SPA().spa(Xcal=Xcal.values, ycal=ycal, m_min=4, m_max=32, Xval=Xval.values, yval=yval, autoscaling=1) elif method == "Cars": sub_feature_indices = CARS_Cloud(X_sample.values, y_sample) else: continue sub_feature_indices_list.append(sub_feature_indices) for sub_indices in sub_feature_indices_list: selected_features.update(sub_indices) selected_features = list(selected_features) print("Selected features after bagging strategy:", selected_features) # 调试信息 else: raise ValueError("Invalid strategy. Choose from 'voting', 'weighted', or 'bagging'.") selected_features = list(map(int, selected_features)) # 确保索引是整数类型 X_Feature = X_df.iloc[:, selected_features] selected_columns = X_df.columns[selected_features] # 关闭图像以释放资源 plt.close() return X_Feature, y, selected_columns