初始提交
This commit is contained in:
123
classification_model/WaveSelect/centry.py
Normal file
123
classification_model/WaveSelect/centry.py
Normal file
@ -0,0 +1,123 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from WaveSelect.Lar import Lar
|
||||
from WaveSelect.Spa import SPA
|
||||
from WaveSelect.Uve import UVE
|
||||
from WaveSelect.Cars import CARS_Cloud
|
||||
from WaveSelect.GA import GA
|
||||
from WaveSelect.ReliefF import ReliefF
|
||||
from sklearn.model_selection import train_test_split
|
||||
from collections import Counter
|
||||
from tqdm import tqdm
|
||||
import os
|
||||
|
||||
def IntegratedWaveSelect(methods, X, y, strategy="voting", column_names=None, name='', result_dir=''):
|
||||
# 检查并确保 X 是 DataFrame
|
||||
if isinstance(X, np.ndarray):
|
||||
if column_names is None:
|
||||
column_names = [f"{i}" for i in range(X.shape[1])]
|
||||
X_df = pd.DataFrame(X, columns=column_names)
|
||||
else:
|
||||
X_df = X
|
||||
|
||||
feature_indices_list = []
|
||||
|
||||
for method in tqdm(methods, desc="Processing Feature Selection Methods"):
|
||||
print(f"Applying method: {method}") # 调试信息
|
||||
if method == "Cars":
|
||||
save_path = os.path.join(result_dir, f"{name}_cars.png")
|
||||
Featuresecletidx = CARS_Cloud(X, y, N=50, f=20, cv=10, save_path=save_path)
|
||||
Featuresecletidx = Featuresecletidx.astype(int)
|
||||
feature_indices_list.append(Featuresecletidx)
|
||||
elif method == "Lars":
|
||||
Featuresecletidx = Lar(X_df.values, y)
|
||||
feature_indices_list.append(Featuresecletidx)
|
||||
elif method == "Uve":
|
||||
uve = UVE(X_df.values, y, 20)
|
||||
uve.calcCriteria()
|
||||
uve.evalCriteria(cv=5)
|
||||
Featuresecletidx = uve.cutFeature()
|
||||
feature_indices_list.append(Featuresecletidx)
|
||||
elif method == "Spa":
|
||||
save_path = os.path.join(result_dir, f"{name}_spa.png")
|
||||
Xcal, Xval, ycal, yval = train_test_split(X_df, y, test_size=0.2)
|
||||
Featuresecletidx = SPA().spa(
|
||||
Xcal=Xcal.values, ycal=ycal, m_min=4, m_max=32, Xval=Xval.values, yval=yval, autoscaling=1,save_path=save_path)
|
||||
feature_indices_list.append(Featuresecletidx)
|
||||
elif method == "GA":
|
||||
Featuresecletidx = GA(X_df.values, y, 10)
|
||||
feature_indices_list.append(Featuresecletidx)
|
||||
elif method == "ReliefF":
|
||||
relieff = ReliefF(n_neighbors=20, n_features_to_keep=20)
|
||||
Featuresecletidx = relieff.fit(X_df.values, y)
|
||||
feature_indices_list.append(Featuresecletidx)
|
||||
else:
|
||||
print(f"No such method: {method}")
|
||||
continue
|
||||
|
||||
print(f"Selected indices by {method}: {Featuresecletidx}") # 调试信息
|
||||
|
||||
print("Feature indices list after all methods:", feature_indices_list) # 调试信息
|
||||
|
||||
if strategy == "voting":
|
||||
if feature_indices_list:
|
||||
all_indices = np.concatenate(feature_indices_list)
|
||||
print("All indices concatenated:", all_indices) # 调试信息
|
||||
counter = Counter(all_indices)
|
||||
print("Counter result:", counter) # 调试信息
|
||||
selected_features = [
|
||||
idx for idx, count in tqdm(counter.items(), desc="Voting Selection")
|
||||
if count > len(methods) / 2
|
||||
]
|
||||
print("Selected features after voting:", selected_features) # 调试信息
|
||||
else:
|
||||
print("No features selected by any method.") # 提示信息
|
||||
selected_features = []
|
||||
elif strategy == "weighted":
|
||||
weights = {method: 1 for method in methods}
|
||||
weighted_counts = Counter()
|
||||
for method, indices in zip(methods, feature_indices_list):
|
||||
for idx in indices:
|
||||
weighted_counts[idx] += weights[method]
|
||||
print("Weighted counts:", weighted_counts) # 调试信息
|
||||
selected_features = [
|
||||
idx for idx, count in tqdm(weighted_counts.items(), desc="Weighted Selection")
|
||||
if count >= np.mean(list(weighted_counts.values()))
|
||||
]
|
||||
print("Selected features after weighted strategy:", selected_features) # 调试信息
|
||||
elif strategy == "bagging":
|
||||
num_iterations = 5
|
||||
selected_features = set()
|
||||
for _ in tqdm(range(num_iterations), desc="Bagging Iterations"):
|
||||
X_sample, _, y_sample, _ = train_test_split(X_df, y, test_size=0.5)
|
||||
sub_feature_indices_list = []
|
||||
for method in methods:
|
||||
if method == "Spa":
|
||||
Xcal, Xval, ycal, yval = train_test_split(X_sample, y_sample, test_size=0.2)
|
||||
sub_feature_indices = SPA().spa(Xcal=Xcal.values, ycal=ycal, m_min=4, m_max=32, Xval=Xval.values,
|
||||
yval=yval, autoscaling=1)
|
||||
elif method == "Cars":
|
||||
sub_feature_indices = CARS_Cloud(X_sample.values, y_sample)
|
||||
else:
|
||||
continue
|
||||
sub_feature_indices_list.append(sub_feature_indices)
|
||||
for sub_indices in sub_feature_indices_list:
|
||||
selected_features.update(sub_indices)
|
||||
selected_features = list(selected_features)
|
||||
print("Selected features after bagging strategy:", selected_features) # 调试信息
|
||||
else:
|
||||
raise ValueError("Invalid strategy. Choose from 'voting', 'weighted', or 'bagging'.")
|
||||
|
||||
selected_features = list(map(int, selected_features)) # 确保索引是整数类型
|
||||
X_Feature = X_df.iloc[:, selected_features]
|
||||
selected_columns = X_df.columns[selected_features]
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# 关闭图像以释放资源
|
||||
plt.close()
|
||||
|
||||
return X_Feature, y, selected_columns
|
||||
Reference in New Issue
Block a user