Files
HSI/Feature_Selection_method/random_fog.py

292 lines
9.5 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.base import clone
import copy
class ShuffledFrogLeaping:
"""
随机蛙跳算法 (Shuffled Frog Leaping Algorithm, SFLA) 进行特征选择
算法原理:
1. 将青蛙种群分成多个小组memeplexes
2. 在每个小组内进行局部搜索和进化
3. 定期重组所有青蛙,进行全局信息交换
4. 重复直到满足停止条件
"""
def __init__(self, n_frogs=50, n_memeplexes=5, n_evolution_steps=10,
n_shuffle_iterations=10, classifier=None, cv=5):
"""
初始化随机蛙跳算法参数
参数:
n_frogs: 青蛙种群大小
n_memeplexes: 小组数量
n_evolution_steps: 每个小组的进化步数
n_shuffle_iterations: 重组迭代次数
classifier: 用于评估特征子集的分类器
cv: 交叉验证折数
"""
self.n_frogs = n_frogs
self.n_memeplexes = n_memeplexes
self.n_evolution_steps = n_evolution_steps
self.n_shuffle_iterations = n_shuffle_iterations
self.classifier = classifier or RandomForestClassifier(random_state=42, n_estimators=50)
self.cv = cv
# 算法内部变量
self.n_features = None
self.frogs = None # 青蛙种群,每个青蛙是一个二进制向量
self.fitness_values = None
self.best_frog = None
self.best_fitness = -np.inf
self.selected_features = None
def _initialize_population(self):
"""初始化青蛙种群"""
self.frogs = []
for _ in range(self.n_frogs):
# 随机初始化二进制向量1表示选择该特征0表示不选择
frog = np.random.randint(0, 2, self.n_features)
self.frogs.append(frog)
self.frogs = np.array(self.frogs)
def _evaluate_fitness(self, X, y):
"""评估所有青蛙的适应度"""
self.fitness_values = []
for frog in self.frogs:
fitness = self._calculate_fitness(frog, X, y)
self.fitness_values.append(fitness)
# 更新全局最优
if fitness > self.best_fitness:
self.best_fitness = fitness
self.best_frog = frog.copy()
self.fitness_values = np.array(self.fitness_values)
def _calculate_fitness(self, frog, X, y):
"""计算单个青蛙的适应度"""
selected_features = np.where(frog == 1)[0]
# 如果没有选择任何特征,返回最低适应度
if len(selected_features) == 0:
return 0.0
# 使用选择的特征进行交叉验证
X_selected = X[:, selected_features]
try:
scores = cross_val_score(clone(self.classifier), X_selected, y, cv=self.cv)
return np.mean(scores)
except:
# 如果交叉验证失败,返回低适应度
return 0.0
def _divide_into_memeplexes(self):
"""将青蛙按适应度排序并分成小组"""
# 按适应度降序排序
sorted_indices = np.argsort(self.fitness_values)[::-1]
self.frogs = self.frogs[sorted_indices]
self.fitness_values = self.fitness_values[sorted_indices]
# 分成小组
memeplexes = []
frogs_per_memeplex = self.n_frogs // self.n_memeplexes
for i in range(self.n_memeplexes):
start_idx = i * frogs_per_memeplex
if i == self.n_memeplexes - 1:
# 最后一个小组包含剩余的所有青蛙
end_idx = self.n_frogs
else:
end_idx = (i + 1) * frogs_per_memeplex
memeplex = {
'frogs': self.frogs[start_idx:end_idx].copy(),
'fitness': self.fitness_values[start_idx:end_idx].copy()
}
memeplexes.append(memeplex)
return memeplexes
def _evolve_memeplex(self, memeplex, X, y):
"""进化单个小组"""
frogs = memeplex['frogs']
fitness = memeplex['fitness']
# 找出小组中的最好和最坏青蛙
best_idx = np.argmax(fitness)
worst_idx = np.argmin(fitness)
best_frog = frogs[best_idx]
worst_frog = frogs[worst_idx]
# 对最坏的青蛙进行进化
for step in range(self.n_evolution_steps):
# 生成新的青蛙: worst_frog + rand() * (best_frog - worst_frog)
rand = np.random.random(self.n_features)
new_frog = worst_frog + rand * (best_frog - worst_frog)
# 二进制化大于0.5的为1否则为0
new_frog = (new_frog > 0.5).astype(int)
# 确保至少选择一个特征
if np.sum(new_frog) == 0:
new_frog[np.random.randint(self.n_features)] = 1
# 计算新青蛙的适应度
new_fitness = self._calculate_fitness(new_frog, X, y)
# 如果新青蛙更好,替换最坏的青蛙
if new_fitness > fitness[worst_idx]:
frogs[worst_idx] = new_frog
fitness[worst_idx] = new_fitness
# 更新小组内的最好青蛙
if new_fitness > fitness[best_idx]:
best_idx = worst_idx
best_frog = new_frog
# 重新找出最坏的青蛙
worst_idx = np.argmin(fitness)
worst_frog = frogs[worst_idx]
else:
# 如果没有改善,随机生成一个新青蛙
new_frog = np.random.randint(0, 2, self.n_features)
if np.sum(new_frog) == 0:
new_frog[np.random.randint(self.n_features)] = 1
new_fitness = self._calculate_fitness(new_frog, X, y)
if new_fitness > fitness[worst_idx]:
frogs[worst_idx] = new_frog
fitness[worst_idx] = new_fitness
return frogs, fitness
def fit(self, X, y):
"""
运行随机蛙跳算法进行特征选择
参数:
X: 特征矩阵 (n_samples, n_features)
y: 标签向量 (n_samples,)
返回:
selected_features: 选择的特征索引列表
"""
self.n_features = X.shape[1]
# 初始化种群
self._initialize_population()
# 初始评估
self._evaluate_fitness(X, y)
# 主循环
for iteration in range(self.n_shuffle_iterations):
# 将青蛙分成小组
memeplexes = self._divide_into_memeplexes()
# 进化每个小组
evolved_frogs = []
evolved_fitness = []
for memeplex in memeplexes:
evolved_frog, evolved_fit = self._evolve_memeplex(memeplex, X, y)
evolved_frogs.extend(evolved_frog)
evolved_fitness.extend(evolved_fit)
# 更新种群
self.frogs = np.array(evolved_frogs)
self.fitness_values = np.array(evolved_fitness)
# 再次评估所有青蛙(确保一致性)
self._evaluate_fitness(X, y)
# 返回最优解
self.selected_features = np.where(self.best_frog == 1)[0]
return self.selected_features.tolist()
def get_feature_importance(self):
"""获取特征选择结果的统计信息"""
if self.selected_features is None:
raise ValueError("请先运行 fit 方法")
n_selected = len(self.selected_features)
selection_ratio = n_selected / self.n_features
return {
'selected_features': self.selected_features,
'n_selected': n_selected,
'n_total': self.n_features,
'selection_ratio': selection_ratio,
'best_fitness': self.best_fitness
}
def shuffled_frog_leaping_selection(X, y, n_frogs=50, n_memeplexes=5,
n_evolution_steps=10, n_shuffle_iterations=10,
classifier=None, cv=5):
"""
使用随机蛙跳算法进行特征选择
参数:
X: 特征矩阵 (n_samples, n_features)
y: 标签向量 (n_samples,)
n_frogs: 青蛙种群大小
n_memeplexes: 小组数量
n_evolution_steps: 每个小组的进化步数
n_shuffle_iterations: 重组迭代次数
classifier: 用于评估特征子集的分类器
cv: 交叉验证折数
返回:
selected_features: 选择的特征索引列表
"""
sfla = ShuffledFrogLeaping(
n_frogs=n_frogs,
n_memeplexes=n_memeplexes,
n_evolution_steps=n_evolution_steps,
n_shuffle_iterations=n_shuffle_iterations,
classifier=classifier,
cv=cv
)
return sfla.fit(X, y)
# 使用示例
if __name__ == "__main__":
# 生成示例数据
from sklearn.datasets import make_classification
X, y = make_classification(
n_samples=200,
n_features=50,
n_informative=10,
n_redundant=10,
n_clusters_per_class=1,
random_state=42
)
print("原始特征数量:", X.shape[1])
# 使用随机蛙跳算法进行特征选择
selected_features = shuffled_frog_leaping_selection(
X, y,
n_frogs=30,
n_memeplexes=3,
n_evolution_steps=5,
n_shuffle_iterations=5
)
print("选择的特征数量:", len(selected_features))
print("选择的特征索引:", selected_features)
# 计算选择率
selection_ratio = len(selected_features) / X.shape[1]
print(".2f")