Files
micro_plastic/classification_model/WaveSelect/centry.py
2026-02-25 09:42:51 +08:00

124 lines
5.4 KiB
Python

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from WaveSelect.Lar import Lar
from WaveSelect.Spa import SPA
from WaveSelect.Uve import UVE
from WaveSelect.Cars import CARS_Cloud
from WaveSelect.GA import GA
from WaveSelect.ReliefF import ReliefF
from sklearn.model_selection import train_test_split
from collections import Counter
from tqdm import tqdm
import os
def IntegratedWaveSelect(methods, X, y, strategy="voting", column_names=None, name='', result_dir=''):
# 检查并确保 X 是 DataFrame
if isinstance(X, np.ndarray):
if column_names is None:
column_names = [f"{i}" for i in range(X.shape[1])]
X_df = pd.DataFrame(X, columns=column_names)
else:
X_df = X
feature_indices_list = []
for method in tqdm(methods, desc="Processing Feature Selection Methods"):
print(f"Applying method: {method}") # 调试信息
if method == "Cars":
save_path = os.path.join(result_dir, f"{name}_cars.png")
Featuresecletidx = CARS_Cloud(X, y, N=50, f=20, cv=10, save_path=save_path)
Featuresecletidx = Featuresecletidx.astype(int)
feature_indices_list.append(Featuresecletidx)
elif method == "Lars":
Featuresecletidx = Lar(X_df.values, y)
feature_indices_list.append(Featuresecletidx)
elif method == "Uve":
uve = UVE(X_df.values, y, 20)
uve.calcCriteria()
uve.evalCriteria(cv=5)
Featuresecletidx = uve.cutFeature()
feature_indices_list.append(Featuresecletidx)
elif method == "Spa":
save_path = os.path.join(result_dir, f"{name}_spa.png")
Xcal, Xval, ycal, yval = train_test_split(X_df, y, test_size=0.2)
Featuresecletidx = SPA().spa(
Xcal=Xcal.values, ycal=ycal, m_min=4, m_max=32, Xval=Xval.values, yval=yval, autoscaling=1,save_path=save_path)
feature_indices_list.append(Featuresecletidx)
elif method == "GA":
Featuresecletidx = GA(X_df.values, y, 10)
feature_indices_list.append(Featuresecletidx)
elif method == "ReliefF":
relieff = ReliefF(n_neighbors=20, n_features_to_keep=20)
Featuresecletidx = relieff.fit(X_df.values, y)
feature_indices_list.append(Featuresecletidx)
else:
print(f"No such method: {method}")
continue
print(f"Selected indices by {method}: {Featuresecletidx}") # 调试信息
print("Feature indices list after all methods:", feature_indices_list) # 调试信息
if strategy == "voting":
if feature_indices_list:
all_indices = np.concatenate(feature_indices_list)
print("All indices concatenated:", all_indices) # 调试信息
counter = Counter(all_indices)
print("Counter result:", counter) # 调试信息
selected_features = [
idx for idx, count in tqdm(counter.items(), desc="Voting Selection")
if count > len(methods) / 2
]
print("Selected features after voting:", selected_features) # 调试信息
else:
print("No features selected by any method.") # 提示信息
selected_features = []
elif strategy == "weighted":
weights = {method: 1 for method in methods}
weighted_counts = Counter()
for method, indices in zip(methods, feature_indices_list):
for idx in indices:
weighted_counts[idx] += weights[method]
print("Weighted counts:", weighted_counts) # 调试信息
selected_features = [
idx for idx, count in tqdm(weighted_counts.items(), desc="Weighted Selection")
if count >= np.mean(list(weighted_counts.values()))
]
print("Selected features after weighted strategy:", selected_features) # 调试信息
elif strategy == "bagging":
num_iterations = 5
selected_features = set()
for _ in tqdm(range(num_iterations), desc="Bagging Iterations"):
X_sample, _, y_sample, _ = train_test_split(X_df, y, test_size=0.5)
sub_feature_indices_list = []
for method in methods:
if method == "Spa":
Xcal, Xval, ycal, yval = train_test_split(X_sample, y_sample, test_size=0.2)
sub_feature_indices = SPA().spa(Xcal=Xcal.values, ycal=ycal, m_min=4, m_max=32, Xval=Xval.values,
yval=yval, autoscaling=1)
elif method == "Cars":
sub_feature_indices = CARS_Cloud(X_sample.values, y_sample)
else:
continue
sub_feature_indices_list.append(sub_feature_indices)
for sub_indices in sub_feature_indices_list:
selected_features.update(sub_indices)
selected_features = list(selected_features)
print("Selected features after bagging strategy:", selected_features) # 调试信息
else:
raise ValueError("Invalid strategy. Choose from 'voting', 'weighted', or 'bagging'.")
selected_features = list(map(int, selected_features)) # 确保索引是整数类型
X_Feature = X_df.iloc[:, selected_features]
selected_columns = X_df.columns[selected_features]
# 关闭图像以释放资源
plt.close()
return X_Feature, y, selected_columns