Files
HSI/Feature_Selection_method/ReliefF.py

89 lines
3.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import numpy as np
from sklearn.neighbors import NearestNeighbors
class ReliefF:
def __init__(self, n_neighbors=20, n_features_to_keep=20):
"""
初始化 ReliefF 算法参数。
:param n_neighbors: 每个样本的近邻数量。
:param n_features_to_keep: 每次保留的特征数量。
"""
self.n_neighbors = n_neighbors
self.n_features_to_keep = n_features_to_keep
self.feature_scores = None # 用于存储每个特征的评分
self.top_features = None # 用于存储评分最高的特征索引
def fit(self, X, y):
"""
根据给定的数据 X 和标签 y 计算特征评分。
:param X: 输入特征矩阵。
:param y: 类别标签。
:return: 返回选择的特征索引。
"""
m, n = X.shape # m 是样本数n 是特征数
self.feature_scores = np.zeros(n) # 初始化特征评分为 0
# 寻找每个样本的 n_neighbors 个近邻
nbrs = NearestNeighbors(n_neighbors=self.n_neighbors + 1).fit(X)
distances, indices = nbrs.kneighbors(X)
# 遍历每个样本,更新特征评分
for i in range(m):
y_i = y[i] # 当前样本的类别标签
# 初始化同类和异类邻居
hit_neighbors = []
miss_neighbors = []
for j in indices[i][1:]: # indices[i][0] 是样本自身,跳过
if y[j] == y_i:
hit_neighbors.append(X[j])
else:
miss_neighbors.append(X[j])
# 更新每个特征的评分
for f in range(n):
for hit in hit_neighbors:
self.feature_scores[f] -= (X[i, f] - hit[f]) ** 2 / (self.n_neighbors * m)
for miss in miss_neighbors:
self.feature_scores[f] += (X[i, f] - miss[f]) ** 2 / (self.n_neighbors * m)
# 选择评分最高的 n_features_to_keep 个特征的索引
self.top_features = np.argsort(self.feature_scores)[-self.n_features_to_keep:]
return self.top_features # 返回选择的特征索引
def fit_transform(self, X, y):
"""一步完成拟合和转换,返回选择的特征索引。"""
return self.fit(X, y)
def multi_scale_relieff_stratified(X, y, segment_size=100, n_subsegments=20, n_features_per_subsegment=5):
"""
分层多尺度特征选择,确保每个波长段都能被覆盖。
:param X: 输入特征矩阵。
:param y: 类别标签。
:param segment_size: 每个波长段的大小。
:param n_subsegments: 每个段内的子区域数量。
:param n_features_per_subsegment: 每个子区域选择的特征数量。
:return: 分层选择的特征索引。
"""
selected_features = []
# 遍历每个波长段
for i in range(0, X.shape[1], segment_size):
segment_X = X[:, i:i + segment_size]
subsegment_size = segment_size // n_subsegments # 子区域大小
# 在每个子区域内进行特征选择
for j in range(0, segment_size, subsegment_size):
subsegment_X = segment_X[:, j:j + subsegment_size]
relief = ReliefF(n_neighbors=10, n_features_to_keep=n_features_per_subsegment)
subsegment_selected = relief.fit_transform(subsegment_X, y)
# 将局部索引转换为全局索引并添加到结果中
selected_features.extend(subsegment_selected + i + j)
# 返回去重后的特征索引
return np.unique(selected_features)