初始提交

2026-02-25 09:42:51 +08:00
parent c25276c481
commit d84d886f35
182 changed files with 18438 additions and 0 deletions
--- a/classification_model/WaveSelect/centry.py
+++ b/classification_model/WaveSelect/centry.py
@ -0,0 +1,123 @@
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from WaveSelect.Lar import Lar
+from WaveSelect.Spa import SPA
+from WaveSelect.Uve import UVE
+from WaveSelect.Cars import CARS_Cloud
+from WaveSelect.GA import GA
+from WaveSelect.ReliefF import ReliefF
+from sklearn.model_selection import train_test_split
+from collections import Counter
+from tqdm import tqdm
+import os
+
+def IntegratedWaveSelect(methods, X, y, strategy="voting", column_names=None,  name='', result_dir=''):
+    # 检查并确保 X 是 DataFrame
+    if isinstance(X, np.ndarray):
+        if column_names is None:
+            column_names = [f"{i}" for i in range(X.shape[1])]
+        X_df = pd.DataFrame(X, columns=column_names)
+    else:
+        X_df = X
+
+    feature_indices_list = []
+
+    for method in tqdm(methods, desc="Processing Feature Selection Methods"):
+        print(f"Applying method: {method}")  # 调试信息
+        if method == "Cars":
+            save_path = os.path.join(result_dir, f"{name}_cars.png")
+            Featuresecletidx = CARS_Cloud(X, y, N=50, f=20, cv=10, save_path=save_path)
+            Featuresecletidx = Featuresecletidx.astype(int)
+            feature_indices_list.append(Featuresecletidx)
+        elif method == "Lars":
+            Featuresecletidx = Lar(X_df.values, y)
+            feature_indices_list.append(Featuresecletidx)
+        elif method == "Uve":
+            uve = UVE(X_df.values, y, 20)
+            uve.calcCriteria()
+            uve.evalCriteria(cv=5)
+            Featuresecletidx = uve.cutFeature()
+            feature_indices_list.append(Featuresecletidx)
+        elif method == "Spa":
+            save_path = os.path.join(result_dir, f"{name}_spa.png")
+            Xcal, Xval, ycal, yval = train_test_split(X_df, y, test_size=0.2)
+            Featuresecletidx = SPA().spa(
+                Xcal=Xcal.values, ycal=ycal, m_min=4, m_max=32, Xval=Xval.values, yval=yval, autoscaling=1,save_path=save_path)
+            feature_indices_list.append(Featuresecletidx)
+        elif method == "GA":
+            Featuresecletidx = GA(X_df.values, y, 10)
+            feature_indices_list.append(Featuresecletidx)
+        elif method == "ReliefF":
+            relieff = ReliefF(n_neighbors=20, n_features_to_keep=20)
+            Featuresecletidx = relieff.fit(X_df.values, y)
+            feature_indices_list.append(Featuresecletidx)
+        else:
+            print(f"No such method: {method}")
+            continue
+
+        print(f"Selected indices by {method}: {Featuresecletidx}")  # 调试信息
+
+    print("Feature indices list after all methods:", feature_indices_list)  # 调试信息
+
+    if strategy == "voting":
+        if feature_indices_list:
+            all_indices = np.concatenate(feature_indices_list)
+            print("All indices concatenated:", all_indices)  # 调试信息
+            counter = Counter(all_indices)
+            print("Counter result:", counter)  # 调试信息
+            selected_features = [
+                idx for idx, count in tqdm(counter.items(), desc="Voting Selection")
+                if count > len(methods) / 2
+            ]
+            print("Selected features after voting:", selected_features)  # 调试信息
+        else:
+            print("No features selected by any method.")  # 提示信息
+            selected_features = []
+    elif strategy == "weighted":
+        weights = {method: 1 for method in methods}
+        weighted_counts = Counter()
+        for method, indices in zip(methods, feature_indices_list):
+            for idx in indices:
+                weighted_counts[idx] += weights[method]
+        print("Weighted counts:", weighted_counts)  # 调试信息
+        selected_features = [
+            idx for idx, count in tqdm(weighted_counts.items(), desc="Weighted Selection")
+            if count >= np.mean(list(weighted_counts.values()))
+        ]
+        print("Selected features after weighted strategy:", selected_features)  # 调试信息
+    elif strategy == "bagging":
+        num_iterations = 5
+        selected_features = set()
+        for _ in tqdm(range(num_iterations), desc="Bagging Iterations"):
+            X_sample, _, y_sample, _ = train_test_split(X_df, y, test_size=0.5)
+            sub_feature_indices_list = []
+            for method in methods:
+                if method == "Spa":
+                    Xcal, Xval, ycal, yval = train_test_split(X_sample, y_sample, test_size=0.2)
+                    sub_feature_indices = SPA().spa(Xcal=Xcal.values, ycal=ycal, m_min=4, m_max=32, Xval=Xval.values,
+                                                    yval=yval, autoscaling=1)
+                elif method == "Cars":
+                    sub_feature_indices = CARS_Cloud(X_sample.values, y_sample)
+                else:
+                    continue
+                sub_feature_indices_list.append(sub_feature_indices)
+            for sub_indices in sub_feature_indices_list:
+                selected_features.update(sub_indices)
+        selected_features = list(selected_features)
+        print("Selected features after bagging strategy:", selected_features)  # 调试信息
+    else:
+        raise ValueError("Invalid strategy. Choose from 'voting', 'weighted', or 'bagging'.")
+
+    selected_features = list(map(int, selected_features))  # 确保索引是整数类型
+    X_Feature = X_df.iloc[:, selected_features]
+    selected_columns = X_df.columns[selected_features]
+
+
+
+
+
+    # 关闭图像以释放资源
+    plt.close()
+
+    return X_Feature, y, selected_columns