增加模块;增加主调用命令

This commit is contained in:
2026-01-07 16:36:47 +08:00
commit 2d4b170a45
109 changed files with 55763 additions and 0 deletions

View File

@ -0,0 +1,292 @@
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.base import clone
import copy
class ShuffledFrogLeaping:
"""
随机蛙跳算法 (Shuffled Frog Leaping Algorithm, SFLA) 进行特征选择
算法原理:
1. 将青蛙种群分成多个小组memeplexes
2. 在每个小组内进行局部搜索和进化
3. 定期重组所有青蛙,进行全局信息交换
4. 重复直到满足停止条件
"""
def __init__(self, n_frogs=50, n_memeplexes=5, n_evolution_steps=10,
n_shuffle_iterations=10, classifier=None, cv=5):
"""
初始化随机蛙跳算法参数
参数:
n_frogs: 青蛙种群大小
n_memeplexes: 小组数量
n_evolution_steps: 每个小组的进化步数
n_shuffle_iterations: 重组迭代次数
classifier: 用于评估特征子集的分类器
cv: 交叉验证折数
"""
self.n_frogs = n_frogs
self.n_memeplexes = n_memeplexes
self.n_evolution_steps = n_evolution_steps
self.n_shuffle_iterations = n_shuffle_iterations
self.classifier = classifier or RandomForestClassifier(random_state=42, n_estimators=50)
self.cv = cv
# 算法内部变量
self.n_features = None
self.frogs = None # 青蛙种群,每个青蛙是一个二进制向量
self.fitness_values = None
self.best_frog = None
self.best_fitness = -np.inf
self.selected_features = None
def _initialize_population(self):
"""初始化青蛙种群"""
self.frogs = []
for _ in range(self.n_frogs):
# 随机初始化二进制向量1表示选择该特征0表示不选择
frog = np.random.randint(0, 2, self.n_features)
self.frogs.append(frog)
self.frogs = np.array(self.frogs)
def _evaluate_fitness(self, X, y):
"""评估所有青蛙的适应度"""
self.fitness_values = []
for frog in self.frogs:
fitness = self._calculate_fitness(frog, X, y)
self.fitness_values.append(fitness)
# 更新全局最优
if fitness > self.best_fitness:
self.best_fitness = fitness
self.best_frog = frog.copy()
self.fitness_values = np.array(self.fitness_values)
def _calculate_fitness(self, frog, X, y):
"""计算单个青蛙的适应度"""
selected_features = np.where(frog == 1)[0]
# 如果没有选择任何特征,返回最低适应度
if len(selected_features) == 0:
return 0.0
# 使用选择的特征进行交叉验证
X_selected = X[:, selected_features]
try:
scores = cross_val_score(clone(self.classifier), X_selected, y, cv=self.cv)
return np.mean(scores)
except:
# 如果交叉验证失败,返回低适应度
return 0.0
def _divide_into_memeplexes(self):
"""将青蛙按适应度排序并分成小组"""
# 按适应度降序排序
sorted_indices = np.argsort(self.fitness_values)[::-1]
self.frogs = self.frogs[sorted_indices]
self.fitness_values = self.fitness_values[sorted_indices]
# 分成小组
memeplexes = []
frogs_per_memeplex = self.n_frogs // self.n_memeplexes
for i in range(self.n_memeplexes):
start_idx = i * frogs_per_memeplex
if i == self.n_memeplexes - 1:
# 最后一个小组包含剩余的所有青蛙
end_idx = self.n_frogs
else:
end_idx = (i + 1) * frogs_per_memeplex
memeplex = {
'frogs': self.frogs[start_idx:end_idx].copy(),
'fitness': self.fitness_values[start_idx:end_idx].copy()
}
memeplexes.append(memeplex)
return memeplexes
def _evolve_memeplex(self, memeplex, X, y):
"""进化单个小组"""
frogs = memeplex['frogs']
fitness = memeplex['fitness']
# 找出小组中的最好和最坏青蛙
best_idx = np.argmax(fitness)
worst_idx = np.argmin(fitness)
best_frog = frogs[best_idx]
worst_frog = frogs[worst_idx]
# 对最坏的青蛙进行进化
for step in range(self.n_evolution_steps):
# 生成新的青蛙: worst_frog + rand() * (best_frog - worst_frog)
rand = np.random.random(self.n_features)
new_frog = worst_frog + rand * (best_frog - worst_frog)
# 二进制化大于0.5的为1否则为0
new_frog = (new_frog > 0.5).astype(int)
# 确保至少选择一个特征
if np.sum(new_frog) == 0:
new_frog[np.random.randint(self.n_features)] = 1
# 计算新青蛙的适应度
new_fitness = self._calculate_fitness(new_frog, X, y)
# 如果新青蛙更好,替换最坏的青蛙
if new_fitness > fitness[worst_idx]:
frogs[worst_idx] = new_frog
fitness[worst_idx] = new_fitness
# 更新小组内的最好青蛙
if new_fitness > fitness[best_idx]:
best_idx = worst_idx
best_frog = new_frog
# 重新找出最坏的青蛙
worst_idx = np.argmin(fitness)
worst_frog = frogs[worst_idx]
else:
# 如果没有改善,随机生成一个新青蛙
new_frog = np.random.randint(0, 2, self.n_features)
if np.sum(new_frog) == 0:
new_frog[np.random.randint(self.n_features)] = 1
new_fitness = self._calculate_fitness(new_frog, X, y)
if new_fitness > fitness[worst_idx]:
frogs[worst_idx] = new_frog
fitness[worst_idx] = new_fitness
return frogs, fitness
def fit(self, X, y):
"""
运行随机蛙跳算法进行特征选择
参数:
X: 特征矩阵 (n_samples, n_features)
y: 标签向量 (n_samples,)
返回:
selected_features: 选择的特征索引列表
"""
self.n_features = X.shape[1]
# 初始化种群
self._initialize_population()
# 初始评估
self._evaluate_fitness(X, y)
# 主循环
for iteration in range(self.n_shuffle_iterations):
# 将青蛙分成小组
memeplexes = self._divide_into_memeplexes()
# 进化每个小组
evolved_frogs = []
evolved_fitness = []
for memeplex in memeplexes:
evolved_frog, evolved_fit = self._evolve_memeplex(memeplex, X, y)
evolved_frogs.extend(evolved_frog)
evolved_fitness.extend(evolved_fit)
# 更新种群
self.frogs = np.array(evolved_frogs)
self.fitness_values = np.array(evolved_fitness)
# 再次评估所有青蛙(确保一致性)
self._evaluate_fitness(X, y)
# 返回最优解
self.selected_features = np.where(self.best_frog == 1)[0]
return self.selected_features.tolist()
def get_feature_importance(self):
"""获取特征选择结果的统计信息"""
if self.selected_features is None:
raise ValueError("请先运行 fit 方法")
n_selected = len(self.selected_features)
selection_ratio = n_selected / self.n_features
return {
'selected_features': self.selected_features,
'n_selected': n_selected,
'n_total': self.n_features,
'selection_ratio': selection_ratio,
'best_fitness': self.best_fitness
}
def shuffled_frog_leaping_selection(X, y, n_frogs=50, n_memeplexes=5,
n_evolution_steps=10, n_shuffle_iterations=10,
classifier=None, cv=5):
"""
使用随机蛙跳算法进行特征选择
参数:
X: 特征矩阵 (n_samples, n_features)
y: 标签向量 (n_samples,)
n_frogs: 青蛙种群大小
n_memeplexes: 小组数量
n_evolution_steps: 每个小组的进化步数
n_shuffle_iterations: 重组迭代次数
classifier: 用于评估特征子集的分类器
cv: 交叉验证折数
返回:
selected_features: 选择的特征索引列表
"""
sfla = ShuffledFrogLeaping(
n_frogs=n_frogs,
n_memeplexes=n_memeplexes,
n_evolution_steps=n_evolution_steps,
n_shuffle_iterations=n_shuffle_iterations,
classifier=classifier,
cv=cv
)
return sfla.fit(X, y)
# 使用示例
if __name__ == "__main__":
# 生成示例数据
from sklearn.datasets import make_classification
X, y = make_classification(
n_samples=200,
n_features=50,
n_informative=10,
n_redundant=10,
n_clusters_per_class=1,
random_state=42
)
print("原始特征数量:", X.shape[1])
# 使用随机蛙跳算法进行特征选择
selected_features = shuffled_frog_leaping_selection(
X, y,
n_frogs=30,
n_memeplexes=3,
n_evolution_steps=5,
n_shuffle_iterations=5
)
print("选择的特征数量:", len(selected_features))
print("选择的特征索引:", selected_features)
# 计算选择率
selection_ratio = len(selected_features) / X.shape[1]
print(".2f")