初始提交
This commit is contained in:
8
.idea/.gitignore
generated
vendored
Normal file
8
.idea/.gitignore
generated
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
# 默认忽略的文件
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
# 基于编辑器的 HTTP 客户端请求
|
||||
/httpRequests/
|
||||
# Datasource local storage ignored files
|
||||
/dataSources/
|
||||
/dataSources.local.xml
|
||||
1
.idea/.name
generated
Normal file
1
.idea/.name
generated
Normal file
@ -0,0 +1 @@
|
||||
main.py
|
||||
25
.idea/inspectionProfiles/Project_Default.xml
generated
Normal file
25
.idea/inspectionProfiles/Project_Default.xml
generated
Normal file
@ -0,0 +1,25 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<profile version="1.0">
|
||||
<option name="myName" value="Project Default" />
|
||||
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
|
||||
<option name="ignoredPackages">
|
||||
<value>
|
||||
<list size="12">
|
||||
<item index="0" class="java.lang.String" itemvalue="spectral" />
|
||||
<item index="1" class="java.lang.String" itemvalue="scipy" />
|
||||
<item index="2" class="java.lang.String" itemvalue="shapely" />
|
||||
<item index="3" class="java.lang.String" itemvalue="PyKrige" />
|
||||
<item index="4" class="java.lang.String" itemvalue="wheel" />
|
||||
<item index="5" class="java.lang.String" itemvalue="pyproj" />
|
||||
<item index="6" class="java.lang.String" itemvalue="setuptools" />
|
||||
<item index="7" class="java.lang.String" itemvalue="tqdm" />
|
||||
<item index="8" class="java.lang.String" itemvalue="matplotlib" />
|
||||
<item index="9" class="java.lang.String" itemvalue="wandb" />
|
||||
<item index="10" class="java.lang.String" itemvalue="numpy" />
|
||||
<item index="11" class="java.lang.String" itemvalue="Pillow" />
|
||||
</list>
|
||||
</value>
|
||||
</option>
|
||||
</inspection_tool>
|
||||
</profile>
|
||||
</component>
|
||||
6
.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
6
.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
@ -0,0 +1,6 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
||||
7
.idea/misc.xml
generated
Normal file
7
.idea/misc.xml
generated
Normal file
@ -0,0 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="Black">
|
||||
<option name="sdkName" value="opensa" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="insect" project-jdk-type="Python SDK" />
|
||||
</project>
|
||||
8
.idea/modules.xml
generated
Normal file
8
.idea/modules.xml
generated
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/plastic.iml" filepath="$PROJECT_DIR$/.idea/plastic.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
||||
11
.idea/plastic.iml
generated
Normal file
11
.idea/plastic.iml
generated
Normal file
@ -0,0 +1,11 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="jdk" jdkName="insect" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
<component name="TestRunnerService">
|
||||
<option name="PROJECT_TEST_RUNNER" value="Unittests" />
|
||||
</component>
|
||||
</module>
|
||||
BIN
__pycache__/bil2rgb.cpython-310.pyc
Normal file
BIN
__pycache__/bil2rgb.cpython-310.pyc
Normal file
Binary file not shown.
BIN
__pycache__/bil2rgb.cpython-312.pyc
Normal file
BIN
__pycache__/bil2rgb.cpython-312.pyc
Normal file
Binary file not shown.
BIN
__pycache__/bil2rgb.cpython-313.pyc
Normal file
BIN
__pycache__/bil2rgb.cpython-313.pyc
Normal file
Binary file not shown.
BIN
__pycache__/extact_shape.cpython-310.pyc
Normal file
BIN
__pycache__/extact_shape.cpython-310.pyc
Normal file
Binary file not shown.
BIN
__pycache__/extact_shape.cpython-312.pyc
Normal file
BIN
__pycache__/extact_shape.cpython-312.pyc
Normal file
Binary file not shown.
BIN
__pycache__/get_glcm.cpython-310.pyc
Normal file
BIN
__pycache__/get_glcm.cpython-310.pyc
Normal file
Binary file not shown.
BIN
__pycache__/get_glcm.cpython-312.pyc
Normal file
BIN
__pycache__/get_glcm.cpython-312.pyc
Normal file
Binary file not shown.
BIN
__pycache__/mask.cpython-310.pyc
Normal file
BIN
__pycache__/mask.cpython-310.pyc
Normal file
Binary file not shown.
BIN
__pycache__/mask.cpython-312.pyc
Normal file
BIN
__pycache__/mask.cpython-312.pyc
Normal file
Binary file not shown.
BIN
__pycache__/outputs2dataframe.cpython-310.pyc
Normal file
BIN
__pycache__/outputs2dataframe.cpython-310.pyc
Normal file
Binary file not shown.
BIN
__pycache__/outputs2dataframe.cpython-312.pyc
Normal file
BIN
__pycache__/outputs2dataframe.cpython-312.pyc
Normal file
Binary file not shown.
BIN
__pycache__/shape_spectral.cpython-310.pyc
Normal file
BIN
__pycache__/shape_spectral.cpython-310.pyc
Normal file
Binary file not shown.
BIN
__pycache__/shape_spectral.cpython-312.pyc
Normal file
BIN
__pycache__/shape_spectral.cpython-312.pyc
Normal file
Binary file not shown.
BIN
__pycache__/shape_spectral_background.cpython-310.pyc
Normal file
BIN
__pycache__/shape_spectral_background.cpython-310.pyc
Normal file
Binary file not shown.
BIN
__pycache__/shape_spectral_background.cpython-312.pyc
Normal file
BIN
__pycache__/shape_spectral_background.cpython-312.pyc
Normal file
Binary file not shown.
53
bil2rgb.py
Normal file
53
bil2rgb.py
Normal file
@ -0,0 +1,53 @@
|
||||
import numpy as np
|
||||
from spectral.io import envi
|
||||
from PIL import Image
|
||||
import matplotlib.pyplot as plt
|
||||
import os
|
||||
def linear_stretch_2_percent(data):
|
||||
"""
|
||||
应用2%线性拉伸到数据
|
||||
|
||||
参数:
|
||||
data: 输入的单波段数据
|
||||
|
||||
返回:
|
||||
拉伸后的数据 (0-255)
|
||||
"""
|
||||
# 计算2%和98%的分位数
|
||||
low = np.percentile(data, 2)
|
||||
high = np.percentile(data, 98)
|
||||
|
||||
# 应用线性拉伸
|
||||
stretched = np.clip((data - low) / (high - low), 0, 1) * 255
|
||||
return stretched.astype(np.uint8)
|
||||
|
||||
|
||||
def process_bil_files(input_folder):
|
||||
"""
|
||||
处理BIL文件:读取10、60、160波段并导出为PNG
|
||||
|
||||
参数:
|
||||
input_folder: 包含BIL文件的输入文件夹
|
||||
"""
|
||||
# 读取BIL文件
|
||||
img = envi.open(input_folder.replace('.bil', '.hdr'), input_folder)
|
||||
|
||||
# 读取指定波段(10, 60, 160)
|
||||
# 注意:波段索引从0开始,所以10波段是索引9,以此类推
|
||||
band_10 = img.read_band(9)
|
||||
band_60 = img.read_band(59)
|
||||
band_160 = img.read_band(159)
|
||||
|
||||
# 应用2%线性拉伸到每个波段
|
||||
band_10_stretched = linear_stretch_2_percent(band_10)
|
||||
band_60_stretched = linear_stretch_2_percent(band_60)
|
||||
band_160_stretched = linear_stretch_2_percent(band_160)
|
||||
|
||||
# 创建RGB图像(分别对应10,60,160波段)
|
||||
rgb_img = np.stack([band_10_stretched, band_60_stretched, band_160_stretched], axis=-1)
|
||||
|
||||
# 将NumPy数组转换为PIL图像
|
||||
# 确保值在0-255范围内并转换为uint8类型
|
||||
rgb_img_pil = Image.fromarray((rgb_img).astype(np.uint8))
|
||||
|
||||
return rgb_img_pil
|
||||
3
classification_model/.idea/.gitignore
generated
vendored
Normal file
3
classification_model/.idea/.gitignore
generated
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
# 默认忽略的文件
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
14
classification_model/.idea/OpenSA.iml
generated
Normal file
14
classification_model/.idea/OpenSA.iml
generated
Normal file
@ -0,0 +1,14 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<excludeFolder url="file://$MODULE_DIR$/venv" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="opensa" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
<component name="PyDocumentationSettings">
|
||||
<option name="format" value="PLAIN" />
|
||||
<option name="myDocStringFormat" value="Plain" />
|
||||
</component>
|
||||
</module>
|
||||
15
classification_model/.idea/deployment.xml
generated
Normal file
15
classification_model/.idea/deployment.xml
generated
Normal file
@ -0,0 +1,15 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="PublishConfigData" autoUpload="Always" remoteFilesAllowedToDisappearOnAutoupload="false">
|
||||
<serverData>
|
||||
<paths name="jiangxianchun@10.255.231.50:22 password">
|
||||
<serverdata>
|
||||
<mappings>
|
||||
<mapping local="$PROJECT_DIR$" web="/" />
|
||||
</mappings>
|
||||
</serverdata>
|
||||
</paths>
|
||||
</serverData>
|
||||
<option name="myAutoUpload" value="ALWAYS" />
|
||||
</component>
|
||||
</project>
|
||||
6
classification_model/.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
6
classification_model/.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
@ -0,0 +1,6 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
||||
10
classification_model/.idea/misc.xml
generated
Normal file
10
classification_model/.idea/misc.xml
generated
Normal file
@ -0,0 +1,10 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="Black">
|
||||
<option name="sdkName" value="pytorch (2)" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="opensa" project-jdk-type="Python SDK" />
|
||||
<component name="PyCharmProfessionalAdvertiser">
|
||||
<option name="shown" value="true" />
|
||||
</component>
|
||||
</project>
|
||||
8
classification_model/.idea/modules.xml
generated
Normal file
8
classification_model/.idea/modules.xml
generated
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/OpenSA.iml" filepath="$PROJECT_DIR$/.idea/OpenSA.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
||||
6
classification_model/.idea/other.xml
generated
Normal file
6
classification_model/.idea/other.xml
generated
Normal file
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="PySciProjectComponent">
|
||||
<option name="PY_INTERACTIVE_PLOTS_SUGGESTED" value="true" />
|
||||
</component>
|
||||
</project>
|
||||
13
classification_model/.idea/vcs.xml
generated
Normal file
13
classification_model/.idea/vcs.xml
generated
Normal file
@ -0,0 +1,13 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$/.." vcs="Git" />
|
||||
<mapping directory="$PROJECT_DIR$/LightGBM" vcs="Git" />
|
||||
<mapping directory="$PROJECT_DIR$/LightGBM/external_libs/compute" vcs="Git" />
|
||||
<mapping directory="$PROJECT_DIR$/LightGBM/external_libs/eigen" vcs="Git" />
|
||||
<mapping directory="$PROJECT_DIR$/LightGBM/external_libs/fast_double_parser" vcs="Git" />
|
||||
<mapping directory="$PROJECT_DIR$/LightGBM/external_libs/fast_double_parser/benchmarks/dependencies/abseil-cpp" vcs="Git" />
|
||||
<mapping directory="$PROJECT_DIR$/LightGBM/external_libs/fast_double_parser/benchmarks/dependencies/double-conversion" vcs="Git" />
|
||||
<mapping directory="$PROJECT_DIR$/LightGBM/external_libs/fmt" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
||||
259
classification_model/Classification/CNN.py
Normal file
259
classification_model/Classification/CNN.py
Normal file
@ -0,0 +1,259 @@
|
||||
import torch.nn.functional as F
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.utils.data import Dataset
|
||||
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
|
||||
import torch.optim as optim
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
from torch.cuda.amp import GradScaler, autocast
|
||||
import os
|
||||
from sklearn.metrics import precision_score, recall_score, f1_score
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
writer = SummaryWriter() # 初始化 TensorBoard
|
||||
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
|
||||
# 自定义数据集,包含数据增强(添加噪声)
|
||||
class MyDataset(Dataset):
|
||||
def __init__(self, specs, labels, augment=False):
|
||||
self.specs = specs
|
||||
self.labels = labels
|
||||
self.augment = augment # 是否启用数据增强
|
||||
|
||||
def __getitem__(self, index):
|
||||
spec, target = self.specs[index], self.labels[index]
|
||||
|
||||
# 数据增强:在训练数据上添加随机噪声
|
||||
if self.augment:
|
||||
noise = 0.01 * torch.randn_like(spec)
|
||||
spec = spec + noise
|
||||
|
||||
return spec, target
|
||||
|
||||
def __len__(self):
|
||||
return len(self.specs)
|
||||
|
||||
|
||||
# 标准化数据
|
||||
def ZspPocess(X_train, X_test, y_train, y_test, need=True):
|
||||
if need:
|
||||
scaler = StandardScaler()
|
||||
X_train = scaler.fit_transform(X_train) # fit_transform 用于训练集
|
||||
X_test = scaler.transform(X_test) # 只对测试集应用 transform
|
||||
|
||||
# 将标准化的数据转换为 Tensor
|
||||
X_train = torch.tensor(X_train[:, np.newaxis, :], dtype=torch.float32)
|
||||
X_test = torch.tensor(X_test[:, np.newaxis, :], dtype=torch.float32)
|
||||
y_train = torch.tensor(y_train, dtype=torch.long)
|
||||
y_test = torch.tensor(y_test, dtype=torch.long)
|
||||
# y_train = torch.tensor(y_train.values, dtype=torch.long)
|
||||
# y_test = torch.tensor(y_test.values, dtype=torch.long)
|
||||
# 使用数据增强 (augment=True) 创建训练集
|
||||
data_train = MyDataset(X_train, y_train, augment=True)
|
||||
data_test = MyDataset(X_test, y_test, augment=False)
|
||||
|
||||
return data_train, data_test
|
||||
|
||||
|
||||
# CNN 模型,添加 Dropout 层和调整 Dropout 率
|
||||
class CNN3Layers(nn.Module):
|
||||
def __init__(self, nls, dropout_conv=0.3, dropout_fc=0.5):
|
||||
super(CNN3Layers, self).__init__()
|
||||
self.CONV1 = nn.Sequential(
|
||||
nn.Conv1d(1, 64, 5, 1, padding=2),
|
||||
nn.BatchNorm1d(64),
|
||||
nn.ReLU(),
|
||||
nn.MaxPool1d(2, 2),
|
||||
nn.Dropout(dropout_conv) # 在卷积层后添加 Dropout
|
||||
)
|
||||
self.CONV2 = nn.Sequential(
|
||||
nn.Conv1d(64, 128, 5, 1, padding=2),
|
||||
nn.BatchNorm1d(128),
|
||||
nn.ReLU(),
|
||||
nn.MaxPool1d(2, 2),
|
||||
nn.Dropout(dropout_conv) # 在卷积层后添加 Dropout
|
||||
)
|
||||
self.CONV3 = nn.Sequential(
|
||||
nn.Conv1d(128, 256, 3, 1, padding=1),
|
||||
nn.BatchNorm1d(256),
|
||||
nn.ReLU(),
|
||||
nn.AdaptiveMaxPool1d(1),
|
||||
nn.Dropout(dropout_conv) # 在卷积层后添加 Dropout
|
||||
)
|
||||
self.fc = nn.Sequential(
|
||||
nn.Linear(256, 128),
|
||||
nn.ReLU(),
|
||||
nn.Dropout(dropout_fc), # 全连接层中的 Dropout
|
||||
nn.Linear(128, nls)
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.CONV1(x)
|
||||
x = self.CONV2(x)
|
||||
x = self.CONV3(x)
|
||||
x = x.view(x.size(0), -1)
|
||||
out = self.fc(x)
|
||||
return out
|
||||
|
||||
|
||||
# 训练函数
|
||||
def CNNTrain(X_train, X_test, y_train, y_test, BATCH_SIZE, n_epochs, nls, model_path):
|
||||
data_train, data_test = ZspPocess(X_train, X_test, y_train, y_test, need=True)
|
||||
train_loader = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=True)
|
||||
test_loader = torch.utils.data.DataLoader(data_test, batch_size=BATCH_SIZE, shuffle=False)
|
||||
|
||||
model = CNN3Layers(nls=nls, dropout_conv=0.3, dropout_fc=0.5).to(device)
|
||||
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=0.001)
|
||||
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=5)
|
||||
criterion = nn.CrossEntropyLoss().to(device)
|
||||
scaler = GradScaler()
|
||||
|
||||
best_acc = 0.0
|
||||
model_save_path = model_path
|
||||
|
||||
for epoch in range(n_epochs):
|
||||
model.train()
|
||||
train_acc, train_loss = [], []
|
||||
|
||||
for i, data in enumerate(train_loader):
|
||||
inputs, labels = data
|
||||
inputs = inputs.to(device).float()
|
||||
labels = labels.to(device).long()
|
||||
|
||||
optimizer.zero_grad()
|
||||
|
||||
with autocast():
|
||||
outputs = model(inputs)
|
||||
loss = criterion(outputs, labels)
|
||||
|
||||
scaler.scale(loss).backward()
|
||||
scaler.step(optimizer)
|
||||
scaler.update()
|
||||
|
||||
_, predicted = torch.max(outputs.data, 1)
|
||||
acc = accuracy_score(labels.cpu(), predicted.cpu())
|
||||
train_acc.append(acc)
|
||||
train_loss.append(loss.item())
|
||||
|
||||
avg_train_loss = np.mean(train_loss)
|
||||
avg_train_acc = np.mean(train_acc)
|
||||
|
||||
writer.add_scalar('Loss/train', avg_train_loss, epoch)
|
||||
writer.add_scalar('Accuracy/train', avg_train_acc, epoch)
|
||||
|
||||
# 测试集评估
|
||||
model.eval()
|
||||
test_acc, test_loss, test_precision, test_recall, test_f1 = [], [], [], [], []
|
||||
y_true, y_pred = [], []
|
||||
with torch.no_grad():
|
||||
for data in test_loader:
|
||||
inputs, labels = data
|
||||
inputs = inputs.to(device).float()
|
||||
labels = labels.to(device).long()
|
||||
|
||||
with autocast():
|
||||
outputs = model(inputs)
|
||||
loss = criterion(outputs, labels)
|
||||
|
||||
_, predicted = torch.max(outputs.data, 1)
|
||||
acc = accuracy_score(labels.cpu(), predicted.cpu())
|
||||
precision = precision_score(labels.cpu(), predicted.cpu(), average='weighted', zero_division=1)
|
||||
recall = recall_score(labels.cpu(), predicted.cpu(), average='weighted', zero_division=1)
|
||||
f1 = f1_score(labels.cpu(), predicted.cpu(), average='weighted', zero_division=1)
|
||||
|
||||
y_true.extend(labels.cpu().numpy())
|
||||
y_pred.extend(predicted.cpu().numpy())
|
||||
|
||||
test_acc.append(acc)
|
||||
test_loss.append(loss.item())
|
||||
test_precision.append(precision)
|
||||
test_recall.append(recall)
|
||||
test_f1.append(f1)
|
||||
|
||||
avg_test_loss = np.mean(test_loss)
|
||||
avg_test_acc = np.mean(test_acc)
|
||||
avg_test_precision = np.mean(test_precision)
|
||||
avg_test_recall = np.mean(test_recall)
|
||||
avg_test_f1 = np.mean(test_f1)
|
||||
|
||||
writer.add_scalar('Loss/test', avg_test_loss, epoch)
|
||||
writer.add_scalar('Accuracy/test', avg_test_acc, epoch)
|
||||
writer.add_scalar('Precision/test', avg_test_precision, epoch)
|
||||
writer.add_scalar('Recall/test', avg_test_recall, epoch)
|
||||
writer.add_scalar('F1_Score/test', avg_test_f1, epoch)
|
||||
|
||||
# 打印每个 epoch 的训练和测试结果
|
||||
print(f"Epoch [{epoch + 1}/{n_epochs}]")
|
||||
print(f"Train Loss: {avg_train_loss:.4f}, Train Accuracy: {avg_train_acc:.4f}")
|
||||
print(f"Test Loss: {avg_test_loss:.4f}, Test Accuracy: {avg_test_acc:.4f}")
|
||||
print(f"Test Precision: {avg_test_precision:.4f}, Test Recall: {avg_test_recall:.4f}, Test F1: {avg_test_f1:.4f}")
|
||||
|
||||
if avg_test_acc > best_acc:
|
||||
best_acc = avg_test_acc
|
||||
torch.save(model.state_dict(), model_save_path)
|
||||
|
||||
scheduler.step(avg_test_loss)
|
||||
|
||||
return {
|
||||
"accuracy": avg_test_acc,
|
||||
"precision": avg_test_precision,
|
||||
"recall": avg_test_recall,
|
||||
"f1_score": avg_test_f1,
|
||||
"confusion_matrix": confusion_matrix(y_true, y_pred)
|
||||
}
|
||||
|
||||
# 测试函数
|
||||
def CNNtest(X_test, y_test, BATCH_SIZE, nls, model_path):
|
||||
# 标准化测试数据并创建 DataLoader
|
||||
scaler = StandardScaler()
|
||||
X_test = scaler.fit_transform(X_test) # 只对 X_test 进行标准化
|
||||
X_test = torch.tensor(X_test[:, np.newaxis, :], dtype=torch.float32)
|
||||
y_test = torch.tensor(y_test, dtype=torch.long)
|
||||
|
||||
# 创建测试数据集和 DataLoader
|
||||
data_test = MyDataset(X_test, y_test, augment=False)
|
||||
test_loader = torch.utils.data.DataLoader(data_test, batch_size=BATCH_SIZE, shuffle=False)
|
||||
|
||||
# 加载模型结构和权重
|
||||
model = CNN3Layers(nls=nls).to(device)
|
||||
model.load_state_dict(torch.load(model_path))
|
||||
|
||||
# 初始化评估指标
|
||||
y_true, y_pred = [], []
|
||||
|
||||
# 测试过程
|
||||
model.eval()
|
||||
with torch.no_grad():
|
||||
for inputs, labels in test_loader:
|
||||
inputs, labels = inputs.to(device).float(), labels.to(device).long()
|
||||
outputs = model(inputs)
|
||||
_, predicted = torch.max(outputs.data, 1)
|
||||
|
||||
# 收集真实标签和预测标签
|
||||
y_true.extend(labels.cpu().numpy())
|
||||
y_pred.extend(predicted.cpu().numpy())
|
||||
|
||||
# 计算评估指标
|
||||
accuracy = accuracy_score(y_true, y_pred)
|
||||
precision = precision_score(y_true, y_pred, average='weighted')
|
||||
recall = recall_score(y_true, y_pred, average='weighted')
|
||||
f1 = f1_score(y_true, y_pred, average='weighted')
|
||||
cm = confusion_matrix(y_true, y_pred)
|
||||
|
||||
# 返回评估结果
|
||||
return {
|
||||
"accuracy": accuracy,
|
||||
"precision": precision,
|
||||
"recall": recall,
|
||||
"f1_score": f1,
|
||||
"confusion_matrix": cm
|
||||
}
|
||||
|
||||
def CNN(X_train, X_test, y_train, y_test, BATCH_SIZE, n_epochs, nls, model_path):
|
||||
# 训练模型
|
||||
train_metrics = CNNTrain(X_train, X_test, y_train, y_test, BATCH_SIZE, n_epochs, nls,model_path)
|
||||
|
||||
# 测试模型并获取评估指标
|
||||
test_metrics = CNNtest(X_test, y_test, BATCH_SIZE, nls, model_path)
|
||||
|
||||
return train_metrics, test_metrics
|
||||
0
classification_model/Classification/CNN_GRU.py
Normal file
0
classification_model/Classification/CNN_GRU.py
Normal file
317
classification_model/Classification/CNN_HYper.py
Normal file
317
classification_model/Classification/CNN_HYper.py
Normal file
@ -0,0 +1,317 @@
|
||||
import torch.nn.functional as F
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.utils.data import Dataset
|
||||
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
|
||||
import torch.optim as optim
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
from torch.cuda.amp import GradScaler, autocast
|
||||
import os
|
||||
from sklearn.metrics import precision_score, recall_score, f1_score
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
writer = SummaryWriter() # 初始化 TensorBoard
|
||||
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
|
||||
# 自定义数据集,包含数据增强(添加噪声)
|
||||
from skopt import BayesSearchCV
|
||||
from skopt.space import Real, Integer
|
||||
import torch
|
||||
import torch.optim as optim
|
||||
from torch.utils.data import DataLoader
|
||||
class MyDataset(Dataset):
|
||||
def __init__(self, specs, labels, augment=False):
|
||||
self.specs = specs
|
||||
self.labels = labels
|
||||
self.augment = augment # 是否启用数据增强
|
||||
|
||||
def __getitem__(self, index):
|
||||
spec, target = self.specs[index], self.labels[index]
|
||||
|
||||
# 数据增强:在训练数据上添加随机噪声
|
||||
if self.augment:
|
||||
noise = 0.01 * torch.randn_like(spec)
|
||||
spec = spec + noise
|
||||
|
||||
return spec, target
|
||||
|
||||
def __len__(self):
|
||||
return len(self.specs)
|
||||
|
||||
|
||||
# 标准化数据
|
||||
def ZspPocess(X_train, X_test, y_train, y_test, need=True):
|
||||
if need:
|
||||
scaler = StandardScaler()
|
||||
X_train = scaler.fit_transform(X_train) # fit_transform 用于训练集
|
||||
X_test = scaler.transform(X_test) # 只对测试集应用 transform
|
||||
|
||||
# 将标准化的数据转换为 Tensor
|
||||
X_train = torch.tensor(X_train[:, np.newaxis, :], dtype=torch.float32)
|
||||
X_test = torch.tensor(X_test[:, np.newaxis, :], dtype=torch.float32)
|
||||
y_train = torch.tensor(y_train, dtype=torch.long)
|
||||
y_test = torch.tensor(y_test, dtype=torch.long)
|
||||
|
||||
# 使用数据增强 (augment=True) 创建训练集
|
||||
data_train = MyDataset(X_train, y_train, augment=True)
|
||||
data_test = MyDataset(X_test, y_test, augment=False)
|
||||
|
||||
return data_train, data_test
|
||||
|
||||
|
||||
# CNN 模型,添加 Dropout 层和调整 Dropout 率
|
||||
class CNN3Layers(nn.Module):
|
||||
def __init__(self, nls, dropout_conv=0.3, dropout_fc=0.5):
|
||||
super(CNN3Layers, self).__init__()
|
||||
self.CONV1 = nn.Sequential(
|
||||
nn.Conv1d(1, 64, 5, 1, padding=2),
|
||||
nn.BatchNorm1d(64),
|
||||
nn.ReLU(),
|
||||
nn.MaxPool1d(2, 2),
|
||||
nn.Dropout(dropout_conv) # 在卷积层后添加 Dropout
|
||||
)
|
||||
self.CONV2 = nn.Sequential(
|
||||
nn.Conv1d(64, 128, 5, 1, padding=2),
|
||||
nn.BatchNorm1d(128),
|
||||
nn.ReLU(),
|
||||
nn.MaxPool1d(2, 2),
|
||||
nn.Dropout(dropout_conv) # 在卷积层后添加 Dropout
|
||||
)
|
||||
self.CONV3 = nn.Sequential(
|
||||
nn.Conv1d(128, 256, 3, 1, padding=1),
|
||||
nn.BatchNorm1d(256),
|
||||
nn.ReLU(),
|
||||
nn.AdaptiveMaxPool1d(1),
|
||||
nn.Dropout(dropout_conv) # 在卷积层后添加 Dropout
|
||||
)
|
||||
self.fc = nn.Sequential(
|
||||
nn.Linear(256, 128),
|
||||
nn.ReLU(),
|
||||
nn.Dropout(dropout_fc), # 全连接层中的 Dropout
|
||||
nn.Linear(128, nls)
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.CONV1(x)
|
||||
x = self.CONV2(x)
|
||||
x = self.CONV3(x)
|
||||
x = x.view(x.size(0), -1)
|
||||
out = self.fc(x)
|
||||
return out
|
||||
|
||||
|
||||
# 训练函数
|
||||
def CNNTrain(X_train, X_test, y_train, y_test, BATCH_SIZE, n_epochs, nls, model_path):
|
||||
data_train, data_test = ZspPocess(X_train, X_test, y_train, y_test, need=True)
|
||||
train_loader = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=True)
|
||||
test_loader = torch.utils.data.DataLoader(data_test, batch_size=BATCH_SIZE, shuffle=False)
|
||||
|
||||
model = CNN3Layers(nls=nls, dropout_conv=0.3, dropout_fc=0.5).to(device)
|
||||
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=0.001)
|
||||
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=5)
|
||||
criterion = nn.CrossEntropyLoss().to(device)
|
||||
scaler = GradScaler()
|
||||
|
||||
best_acc = 0.0
|
||||
model_save_path = model_path
|
||||
|
||||
for epoch in range(n_epochs):
|
||||
model.train()
|
||||
train_acc, train_loss = [], []
|
||||
|
||||
for i, data in enumerate(train_loader):
|
||||
inputs, labels = data
|
||||
inputs = inputs.to(device).float()
|
||||
labels = labels.to(device).long()
|
||||
|
||||
optimizer.zero_grad()
|
||||
|
||||
with autocast():
|
||||
outputs = model(inputs)
|
||||
loss = criterion(outputs, labels)
|
||||
|
||||
scaler.scale(loss).backward()
|
||||
scaler.step(optimizer)
|
||||
scaler.update()
|
||||
|
||||
_, predicted = torch.max(outputs.data, 1)
|
||||
acc = accuracy_score(labels.cpu(), predicted.cpu())
|
||||
train_acc.append(acc)
|
||||
train_loss.append(loss.item())
|
||||
|
||||
avg_train_loss = np.mean(train_loss)
|
||||
avg_train_acc = np.mean(train_acc)
|
||||
|
||||
writer.add_scalar('Loss/train', avg_train_loss, epoch)
|
||||
writer.add_scalar('Accuracy/train', avg_train_acc, epoch)
|
||||
|
||||
# 测试集评估
|
||||
model.eval()
|
||||
test_acc, test_loss, test_precision, test_recall, test_f1 = [], [], [], [], []
|
||||
y_true, y_pred = [], []
|
||||
with torch.no_grad():
|
||||
for data in test_loader:
|
||||
inputs, labels = data
|
||||
inputs = inputs.to(device).float()
|
||||
labels = labels.to(device).long()
|
||||
|
||||
with autocast():
|
||||
outputs = model(inputs)
|
||||
loss = criterion(outputs, labels)
|
||||
|
||||
_, predicted = torch.max(outputs.data, 1)
|
||||
acc = accuracy_score(labels.cpu(), predicted.cpu())
|
||||
precision = precision_score(labels.cpu(), predicted.cpu(), average='weighted', zero_division=1)
|
||||
recall = recall_score(labels.cpu(), predicted.cpu(), average='weighted', zero_division=1)
|
||||
f1 = f1_score(labels.cpu(), predicted.cpu(), average='weighted', zero_division=1)
|
||||
|
||||
y_true.extend(labels.cpu().numpy())
|
||||
y_pred.extend(predicted.cpu().numpy())
|
||||
|
||||
test_acc.append(acc)
|
||||
test_loss.append(loss.item())
|
||||
test_precision.append(precision)
|
||||
test_recall.append(recall)
|
||||
test_f1.append(f1)
|
||||
|
||||
avg_test_loss = np.mean(test_loss)
|
||||
avg_test_acc = np.mean(test_acc)
|
||||
avg_test_precision = np.mean(test_precision)
|
||||
avg_test_recall = np.mean(test_recall)
|
||||
avg_test_f1 = np.mean(test_f1)
|
||||
|
||||
writer.add_scalar('Loss/test', avg_test_loss, epoch)
|
||||
writer.add_scalar('Accuracy/test', avg_test_acc, epoch)
|
||||
writer.add_scalar('Precision/test', avg_test_precision, epoch)
|
||||
writer.add_scalar('Recall/test', avg_test_recall, epoch)
|
||||
writer.add_scalar('F1_Score/test', avg_test_f1, epoch)
|
||||
|
||||
# 打印每个 epoch 的训练和测试结果
|
||||
print(f"Epoch [{epoch + 1}/{n_epochs}]")
|
||||
print(f"Train Loss: {avg_train_loss:.4f}, Train Accuracy: {avg_train_acc:.4f}")
|
||||
print(f"Test Loss: {avg_test_loss:.4f}, Test Accuracy: {avg_test_acc:.4f}")
|
||||
print(f"Test Precision: {avg_test_precision:.4f}, Test Recall: {avg_test_recall:.4f}, Test F1: {avg_test_f1:.4f}")
|
||||
|
||||
if avg_test_acc > best_acc:
|
||||
best_acc = avg_test_acc
|
||||
torch.save(model.state_dict(), model_save_path)
|
||||
|
||||
scheduler.step(avg_test_loss)
|
||||
|
||||
return {
|
||||
"accuracy": avg_test_acc,
|
||||
"precision": avg_test_precision,
|
||||
"recall": avg_test_recall,
|
||||
"f1_score": avg_test_f1,
|
||||
"confusion_matrix": confusion_matrix(y_true, y_pred)
|
||||
}
|
||||
|
||||
# 测试函数
|
||||
def CNNtest(X_test, y_test, BATCH_SIZE, nls, model_path):
|
||||
# 标准化测试数据并创建 DataLoader
|
||||
scaler = StandardScaler()
|
||||
X_test = scaler.fit_transform(X_test) # 只对 X_test 进行标准化
|
||||
X_test = torch.tensor(X_test[:, np.newaxis, :], dtype=torch.float32)
|
||||
y_test = torch.tensor(y_test, dtype=torch.long)
|
||||
|
||||
# 创建测试数据集和 DataLoader
|
||||
data_test = MyDataset(X_test, y_test, augment=False)
|
||||
test_loader = torch.utils.data.DataLoader(data_test, batch_size=BATCH_SIZE, shuffle=False)
|
||||
|
||||
# 加载模型结构和权重
|
||||
model = CNN3Layers(nls=nls).to(device)
|
||||
model.load_state_dict(torch.load(model_path))
|
||||
|
||||
# 初始化评估指标
|
||||
y_true, y_pred = [], []
|
||||
|
||||
# 测试过程
|
||||
model.eval()
|
||||
with torch.no_grad():
|
||||
for inputs, labels in test_loader:
|
||||
inputs, labels = inputs.to(device).float(), labels.to(device).long()
|
||||
outputs = model(inputs)
|
||||
_, predicted = torch.max(outputs.data, 1)
|
||||
|
||||
# 收集真实标签和预测标签
|
||||
y_true.extend(labels.cpu().numpy())
|
||||
y_pred.extend(predicted.cpu().numpy())
|
||||
|
||||
# 计算评估指标
|
||||
accuracy = accuracy_score(y_true, y_pred)
|
||||
precision = precision_score(y_true, y_pred, average='weighted')
|
||||
recall = recall_score(y_true, y_pred, average='weighted')
|
||||
f1 = f1_score(y_true, y_pred, average='weighted')
|
||||
cm = confusion_matrix(y_true, y_pred)
|
||||
|
||||
# 返回评估结果
|
||||
return {
|
||||
"accuracy": accuracy,
|
||||
"precision": precision,
|
||||
"recall": recall,
|
||||
"f1_score": f1,
|
||||
"confusion_matrix": cm
|
||||
}
|
||||
|
||||
|
||||
def optimize_CNN(X_train, X_test, y_train, y_test, model_path):
|
||||
# 贝叶斯优化的搜索空间
|
||||
param_space = {
|
||||
'batch_size': Integer(16, 128), # batch size 的范围
|
||||
'n_epochs': Integer(10, 100), # 训练 epochs 的范围
|
||||
'dropout_conv': Real(0.1, 0.5, 'uniform'), # 卷积层 dropout 比例
|
||||
'dropout_fc': Real(0.1, 0.5, 'uniform'), # 全连接层 dropout 比例
|
||||
'lr': Real(1e-5, 1e-2, 'log-uniform'), # 学习率范围
|
||||
}
|
||||
|
||||
# 训练模型的目标函数
|
||||
def objective(params):
|
||||
batch_size, n_epochs, dropout_conv, dropout_fc, lr = params
|
||||
|
||||
# 使用给定的超参数进行训练
|
||||
train_metrics = CNNTrain(
|
||||
X_train, X_test, y_train, y_test,
|
||||
BATCH_SIZE=batch_size, n_epochs=n_epochs,
|
||||
nls=21, model_path=model_path,
|
||||
)
|
||||
|
||||
# 测试模型并返回评估指标
|
||||
test_metrics = CNNtest(X_test, y_test, batch_size, nls=21, model_path=model_path)
|
||||
|
||||
# 我们以测试集的 accuracy 作为优化目标
|
||||
return -test_metrics["accuracy"] # 贝叶斯优化是最小化目标函数,所以返回负值
|
||||
|
||||
# 使用贝叶斯优化进行调优
|
||||
optimizer = BayesSearchCV(
|
||||
estimator=None, # 不使用具体的模型,这里我们将目标函数传给贝叶斯优化
|
||||
search_spaces=param_space, # 搜索空间
|
||||
n_iter=20, # 调优的迭代次数
|
||||
n_jobs=-1, # 使用所有可用的 CPU 核心
|
||||
verbose=1, # 输出优化过程
|
||||
random_state=42, # 固定随机种子
|
||||
)
|
||||
|
||||
# 进行超参数调优
|
||||
optimizer.fit(X_train, y_train)
|
||||
|
||||
# 输出最优超参数
|
||||
best_params = optimizer.best_params_
|
||||
print("Best hyperparameters:", best_params)
|
||||
|
||||
# 使用最优超参数训练并返回评估指标
|
||||
batch_size = best_params['batch_size']
|
||||
n_epochs = best_params['n_epochs']
|
||||
dropout_conv = best_params['dropout_conv']
|
||||
dropout_fc = best_params['dropout_fc']
|
||||
lr = best_params['lr']
|
||||
|
||||
train_metrics = CNNTrain(
|
||||
X_train, X_test, y_train, y_test,
|
||||
BATCH_SIZE=batch_size, n_epochs=n_epochs,
|
||||
nls=21, model_path=model_path,
|
||||
)
|
||||
|
||||
test_metrics = CNNtest(X_test, y_test, batch_size, nls=21, model_path=model_path)
|
||||
|
||||
# 返回训练和测试的评估结果
|
||||
return best_params, train_metrics, test_metrics
|
||||
330
classification_model/Classification/CNN_SAE.py
Normal file
330
classification_model/Classification/CNN_SAE.py
Normal file
@ -0,0 +1,330 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
from torch.cuda.amp import GradScaler, autocast
|
||||
from torch.utils.data import Dataset, DataLoader
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
|
||||
from sklearn.model_selection import train_test_split
|
||||
import numpy as np
|
||||
import os
|
||||
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
|
||||
|
||||
# 自定义数据集
|
||||
class MyDataset(Dataset):
|
||||
def __init__(self, specs, labels, augment=False):
|
||||
self.specs = specs
|
||||
self.labels = labels
|
||||
self.augment = augment
|
||||
|
||||
def __getitem__(self, index):
|
||||
spec, target = self.specs[index], self.labels[index]
|
||||
if self.augment:
|
||||
noise = 0.01 * torch.randn_like(spec)
|
||||
spec = spec + noise
|
||||
return spec, target
|
||||
|
||||
def __len__(self):
|
||||
return len(self.specs)
|
||||
|
||||
# 数据标准化
|
||||
def ZspProcess(X_train, X_test, y_train, y_test, need=True):
|
||||
if need:
|
||||
scaler = StandardScaler()
|
||||
X_train = scaler.fit_transform(X_train)
|
||||
X_test = scaler.transform(X_test)
|
||||
|
||||
X_train = torch.tensor(X_train[:, np.newaxis, :], dtype=torch.float32)
|
||||
X_test = torch.tensor(X_test[:, np.newaxis, :], dtype=torch.float32)
|
||||
y_train = torch.tensor(y_train, dtype=torch.long)
|
||||
y_test = torch.tensor(y_test, dtype=torch.long)
|
||||
|
||||
data_train = MyDataset(X_train, y_train, augment=True)
|
||||
data_test = MyDataset(X_test, y_test, augment=False)
|
||||
return data_train, data_test
|
||||
|
||||
# Focal Loss
|
||||
class FocalLoss(nn.Module):
|
||||
def __init__(self, alpha=1, gamma=2, reduction='mean'):
|
||||
super(FocalLoss, self).__init__()
|
||||
self.alpha = alpha
|
||||
self.gamma = gamma
|
||||
self.reduction = reduction
|
||||
|
||||
def forward(self, inputs, targets):
|
||||
probs = torch.softmax(inputs, dim=1)
|
||||
target_probs = probs[range(len(targets)), targets]
|
||||
focal_weight = self.alpha * (1 - target_probs) ** self.gamma
|
||||
log_prob = -torch.log(target_probs)
|
||||
loss = focal_weight * log_prob
|
||||
|
||||
if self.reduction == 'mean':
|
||||
return loss.mean()
|
||||
elif self.reduction == 'sum':
|
||||
return loss.sum()
|
||||
else:
|
||||
return loss
|
||||
|
||||
# 位置编码模块
|
||||
class PositionalEncoding(nn.Module):
|
||||
def __init__(self, embed_dim, max_len=5000):
|
||||
super(PositionalEncoding, self).__init__()
|
||||
pe = torch.zeros(max_len, embed_dim)
|
||||
position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
|
||||
div_term = torch.exp(torch.arange(0, embed_dim, 2).float() * (-torch.log(torch.tensor(10000.0)) / embed_dim))
|
||||
pe[:, 0::2] = torch.sin(position * div_term) # 偶数维度
|
||||
pe[:, 1::2] = torch.cos(position * div_term) # 奇数维度
|
||||
pe = pe.unsqueeze(0).transpose(0, 1) # (max_len, 1, embed_dim)
|
||||
self.register_buffer('pe', pe)
|
||||
|
||||
def forward(self, x):
|
||||
return x + self.pe[:x.size(0), :]
|
||||
|
||||
# Transformer模块
|
||||
class TransformerBlockWithSAE(nn.Module):
|
||||
def __init__(self, embed_dim, ff_dim, dropout=0.1, max_len=5000):
|
||||
super(TransformerBlockWithSAE, self).__init__()
|
||||
self.query = nn.Linear(embed_dim, embed_dim)
|
||||
self.key = nn.Linear(embed_dim, embed_dim)
|
||||
self.value = nn.Linear(embed_dim, embed_dim)
|
||||
self.scale = embed_dim ** 0.5
|
||||
self.positional_encoding = PositionalEncoding(embed_dim, max_len)
|
||||
|
||||
self.feed_forward = nn.Sequential(
|
||||
nn.Linear(embed_dim, ff_dim),
|
||||
nn.ReLU(),
|
||||
nn.Linear(ff_dim, embed_dim)
|
||||
)
|
||||
self.layernorm1 = nn.LayerNorm(embed_dim)
|
||||
self.layernorm2 = nn.LayerNorm(embed_dim)
|
||||
self.dropout = nn.Dropout(dropout)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.positional_encoding(x)
|
||||
q = self.query(x)
|
||||
k = self.key(x)
|
||||
v = self.value(x)
|
||||
|
||||
attn_weights = torch.matmul(q, k.transpose(-2, -1)) / self.scale
|
||||
attn_weights = torch.softmax(attn_weights, dim=-1)
|
||||
attn_output = torch.matmul(attn_weights, v)
|
||||
|
||||
x = self.layernorm1(x + self.dropout(attn_output))
|
||||
ff_output = self.feed_forward(x)
|
||||
x = self.layernorm2(x + self.dropout(ff_output))
|
||||
return x
|
||||
|
||||
# 修改后的 CNN+Transformer 模型
|
||||
class CNNWithSAE(nn.Module):
|
||||
def __init__(self, nls, embed_dim=96, ff_dim=192, dropout=0.1, max_len=5000):
|
||||
super(CNNWithSAE, self).__init__()
|
||||
self.conv1 = nn.Sequential(
|
||||
nn.Conv1d(1, 64, kernel_size=5, stride=2, padding=2),
|
||||
nn.BatchNorm1d(64),
|
||||
nn.ReLU(),
|
||||
nn.Dropout(0.2),
|
||||
nn.MaxPool1d(2, 2)
|
||||
)
|
||||
self.conv2 = nn.Sequential(
|
||||
nn.Conv1d(64, embed_dim, kernel_size=5, stride=2, padding=2),
|
||||
nn.BatchNorm1d(embed_dim),
|
||||
nn.ReLU(),
|
||||
nn.Dropout(0.2),
|
||||
nn.MaxPool1d(2, 2)
|
||||
)
|
||||
self.transformer = TransformerBlockWithSAE(embed_dim, ff_dim, dropout, max_len)
|
||||
self.fc = nn.Sequential(
|
||||
nn.Linear(embed_dim, 128),
|
||||
nn.ReLU(),
|
||||
nn.Dropout(0.3),
|
||||
nn.Linear(128, nls)
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
x = self.conv2(x)
|
||||
x = x.permute(2, 0, 1)
|
||||
x = self.transformer(x)
|
||||
x = x.mean(dim=0)
|
||||
x = self.fc(x)
|
||||
return x
|
||||
|
||||
# 修改后的 CNN+Transformer 模型
|
||||
class CNNWithSAE(nn.Module):
|
||||
def __init__(self, nls, embed_dim=96, ff_dim=192, dropout=0.1):
|
||||
super(CNNWithSAE, self).__init__()
|
||||
self.conv1 = nn.Sequential(
|
||||
nn.Conv1d(1, 64, kernel_size=5, stride=2, padding=2),
|
||||
nn.BatchNorm1d(64),
|
||||
nn.ReLU(),
|
||||
nn.Dropout(0.2),
|
||||
nn.MaxPool1d(2, 2)
|
||||
)
|
||||
self.conv2 = nn.Sequential(
|
||||
nn.Conv1d(64, embed_dim, kernel_size=5, stride=2, padding=2),
|
||||
nn.BatchNorm1d(embed_dim),
|
||||
nn.ReLU(),
|
||||
nn.Dropout(0.2),
|
||||
nn.MaxPool1d(2, 2)
|
||||
)
|
||||
self.transformer = TransformerBlockWithSAE(embed_dim, ff_dim, dropout)
|
||||
self.fc = nn.Sequential(
|
||||
nn.Linear(embed_dim, 128),
|
||||
nn.ReLU(),
|
||||
nn.Dropout(0.3),
|
||||
nn.Linear(128, nls)
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
x = self.conv2(x)
|
||||
x = x.permute(2, 0, 1) # 调整为 Transformer 输入格式 (seq_len, batch, embed_dim)
|
||||
x = self.transformer(x)
|
||||
x = x.mean(dim=0) # 平均池化
|
||||
x = self.fc(x)
|
||||
return x
|
||||
|
||||
# 训练函数(包含早停机制)
|
||||
def TransformerTrain(X_train, X_val, y_train, y_val, BATCH_SIZE, n_epochs, nls, model_path, patience=10):
|
||||
data_train, data_val = ZspProcess(X_train, X_val, y_train, y_val, need=True)
|
||||
train_loader = DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=True)
|
||||
val_loader = DataLoader(data_val, batch_size=BATCH_SIZE, shuffle=False)
|
||||
|
||||
model = CNNWithSAE(nls=nls).to(device)
|
||||
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.001)
|
||||
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5)
|
||||
criterion = FocalLoss(alpha=1, gamma=2).to(device)
|
||||
scaler = GradScaler()
|
||||
|
||||
best_val_loss = float('inf')
|
||||
early_stop_counter = 0
|
||||
y_true_train, y_pred_train = [], []
|
||||
|
||||
for epoch in range(n_epochs):
|
||||
model.train()
|
||||
train_loss, train_acc = [], []
|
||||
|
||||
for inputs, labels in train_loader:
|
||||
inputs, labels = inputs.to(device), labels.to(device)
|
||||
optimizer.zero_grad()
|
||||
|
||||
with autocast():
|
||||
outputs = model(inputs)
|
||||
loss = criterion(outputs, labels)
|
||||
|
||||
scaler.scale(loss).backward()
|
||||
scaler.step(optimizer)
|
||||
scaler.update()
|
||||
|
||||
_, preds = torch.max(outputs, 1)
|
||||
y_true_train.extend(labels.cpu().numpy())
|
||||
y_pred_train.extend(preds.cpu().numpy())
|
||||
acc = accuracy_score(labels.cpu(), preds.cpu())
|
||||
train_loss.append(loss.item())
|
||||
train_acc.append(acc)
|
||||
|
||||
# 验证集评估
|
||||
model.eval()
|
||||
val_loss = []
|
||||
with torch.no_grad():
|
||||
for inputs, labels in val_loader:
|
||||
inputs, labels = inputs.to(device), labels.to(device)
|
||||
outputs = model(inputs)
|
||||
loss = criterion(outputs, labels)
|
||||
val_loss.append(loss.item())
|
||||
|
||||
avg_val_loss = np.mean(val_loss)
|
||||
avg_train_loss = np.mean(train_loss)
|
||||
avg_train_acc = np.mean(train_acc)
|
||||
|
||||
print(f"Epoch [{epoch+1}/{n_epochs}] - Train Loss: {avg_train_loss:.4f}, Train Acc: {avg_train_acc:.4f}, Val Loss: {avg_val_loss:.4f}")
|
||||
|
||||
if avg_val_loss < best_val_loss:
|
||||
best_val_loss = avg_val_loss
|
||||
early_stop_counter = 0
|
||||
torch.save(model.state_dict(), model_path)
|
||||
print("Model improved and saved.")
|
||||
else:
|
||||
early_stop_counter += 1
|
||||
print(f"No improvement. Early stop counter: {early_stop_counter}/{patience}")
|
||||
|
||||
if early_stop_counter >= patience:
|
||||
print("Early stopping triggered.")
|
||||
break
|
||||
|
||||
|
||||
# 训练集指标
|
||||
train_accuracy = accuracy_score(y_true_train, y_pred_train)
|
||||
train_precision = precision_score(y_true_train, y_pred_train, average='weighted')
|
||||
train_recall = recall_score(y_true_train, y_pred_train, average='weighted')
|
||||
train_f1 = f1_score(y_true_train, y_pred_train, average='weighted')
|
||||
train_cm = confusion_matrix(y_true_train, y_pred_train)
|
||||
|
||||
train_metrics = {
|
||||
"accuracy": train_accuracy,
|
||||
"precision": train_precision,
|
||||
"recall": train_recall,
|
||||
"f1_score": train_f1,
|
||||
"confusion_matrix": train_cm
|
||||
}
|
||||
|
||||
return model, train_metrics
|
||||
|
||||
# 测试函数
|
||||
def TransformerTest(X_test, y_test, BATCH_SIZE, nls, model_path):
|
||||
data_test = ZspProcess(X_test, X_test, y_test, y_test, need=True)[1]
|
||||
test_loader = DataLoader(data_test, batch_size=BATCH_SIZE, shuffle=False)
|
||||
|
||||
model = CNNWithSAE(nls=nls).to(device)
|
||||
model.load_state_dict(torch.load(model_path))
|
||||
model.eval()
|
||||
|
||||
y_true, y_pred = [], []
|
||||
test_loss = []
|
||||
|
||||
criterion = FocalLoss(alpha=1, gamma=2).to(device) # 使用 FocalLoss
|
||||
|
||||
with torch.no_grad():
|
||||
for inputs, labels in test_loader:
|
||||
inputs, labels = inputs.to(device), labels.to(device)
|
||||
outputs = model(inputs)
|
||||
loss = criterion(outputs, labels)
|
||||
|
||||
_, preds = torch.max(outputs, 1)
|
||||
y_true.extend(labels.cpu().numpy())
|
||||
y_pred.extend(preds.cpu().numpy())
|
||||
test_loss.append(loss.item())
|
||||
|
||||
# 测试集指标
|
||||
test_accuracy = accuracy_score(y_true, y_pred)
|
||||
test_precision = precision_score(y_true, y_pred, average='weighted')
|
||||
test_recall = recall_score(y_true, y_pred, average='weighted')
|
||||
test_f1 = f1_score(y_true, y_pred, average='weighted')
|
||||
test_cm = confusion_matrix(y_true, y_pred)
|
||||
|
||||
test_metrics = {
|
||||
"accuracy": test_accuracy,
|
||||
"precision": test_precision,
|
||||
"recall": test_recall,
|
||||
"f1_score": test_f1,
|
||||
"confusion_matrix": test_cm
|
||||
}
|
||||
|
||||
print(f"Accuracy: {test_accuracy:.4f}, Precision: {test_precision:.4f}, Recall: {test_recall:.4f}, F1 Score: {test_f1:.4f}")
|
||||
print(f"Confusion Matrix:\n{test_cm}")
|
||||
return test_metrics
|
||||
|
||||
|
||||
def SAETrainAndTest(X,X_test, y, y_test, BATCH_SIZE, n_epochs, nls, model_path, val_split=0.2, patience=10):
|
||||
# 从训练集中划分验证集
|
||||
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=val_split, random_state=42)
|
||||
|
||||
# 训练模型并获取训练指标
|
||||
model, train_metrics = TransformerTrain(X_train, X_val, y_train, y_val, BATCH_SIZE, n_epochs, nls, model_path, patience)
|
||||
|
||||
# 测试模型并获取测试指标
|
||||
test_metrics = TransformerTest(X_test, y_test, BATCH_SIZE, nls, model_path)
|
||||
|
||||
return train_metrics, test_metrics
|
||||
268
classification_model/Classification/CNN_Transfomer.py
Normal file
268
classification_model/Classification/CNN_Transfomer.py
Normal file
@ -0,0 +1,268 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
from torch.cuda.amp import GradScaler, autocast
|
||||
from torch.utils.data import Dataset, DataLoader
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
|
||||
from sklearn.utils.class_weight import compute_class_weight
|
||||
from sklearn.model_selection import train_test_split
|
||||
import numpy as np
|
||||
import os
|
||||
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
|
||||
|
||||
# 自定义数据集
|
||||
class MyDataset(Dataset):
|
||||
def __init__(self, specs, labels, augment=False):
|
||||
self.specs = specs
|
||||
self.labels = labels
|
||||
self.augment = augment
|
||||
|
||||
def __getitem__(self, index):
|
||||
spec, target = self.specs[index], self.labels[index]
|
||||
if self.augment:
|
||||
noise = 0.01 * torch.randn_like(spec)
|
||||
spec = spec + noise
|
||||
return spec, target
|
||||
|
||||
def __len__(self):
|
||||
return len(self.specs)
|
||||
|
||||
# 数据标准化
|
||||
def ZspProcess(X_train, X_test, y_train, y_test, need=True):
|
||||
if need:
|
||||
scaler = StandardScaler()
|
||||
X_train = scaler.fit_transform(X_train)
|
||||
X_test = scaler.transform(X_test)
|
||||
|
||||
X_train = torch.tensor(X_train[:, np.newaxis, :], dtype=torch.float32)
|
||||
X_test = torch.tensor(X_test[:, np.newaxis, :], dtype=torch.float32)
|
||||
y_train = torch.tensor(y_train, dtype=torch.long)
|
||||
y_test = torch.tensor(y_test, dtype=torch.long)
|
||||
|
||||
data_train = MyDataset(X_train, y_train, augment=True)
|
||||
data_test = MyDataset(X_test, y_test, augment=False)
|
||||
return data_train, data_test
|
||||
|
||||
# Focal Loss
|
||||
class FocalLoss(nn.Module):
|
||||
def __init__(self, alpha=1, gamma=2, reduction='mean'):
|
||||
super(FocalLoss, self).__init__()
|
||||
self.alpha = alpha
|
||||
self.gamma = gamma
|
||||
self.reduction = reduction
|
||||
|
||||
def forward(self, inputs, targets):
|
||||
probs = torch.softmax(inputs, dim=1)
|
||||
target_probs = probs[range(len(targets)), targets]
|
||||
focal_weight = self.alpha * (1 - target_probs) ** self.gamma
|
||||
log_prob = -torch.log(target_probs)
|
||||
loss = focal_weight * log_prob
|
||||
|
||||
if self.reduction == 'mean':
|
||||
return loss.mean()
|
||||
elif self.reduction == 'sum':
|
||||
return loss.sum()
|
||||
else:
|
||||
return loss
|
||||
|
||||
# Transformer模块
|
||||
class TransformerBlock(nn.Module):
|
||||
def __init__(self, embed_dim, num_heads, ff_dim, dropout=0.1):
|
||||
super(TransformerBlock, self).__init__()
|
||||
self.attention = nn.MultiheadAttention(embed_dim, num_heads, dropout=dropout)
|
||||
self.feed_forward = nn.Sequential(
|
||||
nn.Linear(embed_dim, ff_dim),
|
||||
nn.ReLU(),
|
||||
nn.Linear(ff_dim, embed_dim)
|
||||
)
|
||||
self.layernorm1 = nn.LayerNorm(embed_dim)
|
||||
self.layernorm2 = nn.LayerNorm(embed_dim)
|
||||
self.dropout = nn.Dropout(dropout)
|
||||
|
||||
def forward(self, x):
|
||||
attn_output, _ = self.attention(x, x, x)
|
||||
x = self.layernorm1(x + self.dropout(attn_output))
|
||||
ff_output = self.feed_forward(x)
|
||||
x = self.layernorm2(x + self.dropout(ff_output))
|
||||
return x
|
||||
|
||||
# 改进后的CNN+Transformer模型
|
||||
class CNNWithTransformer(nn.Module):
|
||||
def __init__(self, nls, embed_dim=96, num_heads=2, ff_dim=192, dropout=0.1):
|
||||
super(CNNWithTransformer, self).__init__()
|
||||
self.conv1 = nn.Sequential(
|
||||
nn.Conv1d(1, 64, kernel_size=5, stride=2, padding=2),
|
||||
nn.BatchNorm1d(64),
|
||||
nn.ReLU(),
|
||||
nn.Dropout(0.2), # 添加Dropout
|
||||
nn.MaxPool1d(2, 2)
|
||||
)
|
||||
self.conv2 = nn.Sequential(
|
||||
nn.Conv1d(64, embed_dim, kernel_size=5, stride=2, padding=2),
|
||||
nn.BatchNorm1d(embed_dim),
|
||||
nn.ReLU(),
|
||||
nn.Dropout(0.2), # 添加Dropout
|
||||
nn.MaxPool1d(2, 2)
|
||||
)
|
||||
self.transformer = TransformerBlock(embed_dim, num_heads, ff_dim, dropout)
|
||||
self.fc = nn.Sequential(
|
||||
nn.Linear(embed_dim, 128),
|
||||
nn.ReLU(),
|
||||
nn.Dropout(0.3), # 添加Dropout
|
||||
nn.Linear(128, nls)
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
x = self.conv2(x)
|
||||
x = x.permute(2, 0, 1)
|
||||
x = self.transformer(x)
|
||||
x = x.mean(dim=0)
|
||||
x = self.fc(x)
|
||||
return x
|
||||
|
||||
# 训练函数(包含早停机制)
|
||||
def TransformerTrain(X_train, X_val, y_train, y_val, BATCH_SIZE, n_epochs, nls, model_path, patience=10):
|
||||
data_train, data_val = ZspProcess(X_train, X_val, y_train, y_val, need=True)
|
||||
train_loader = DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=True)
|
||||
val_loader = DataLoader(data_val, batch_size=BATCH_SIZE, shuffle=False)
|
||||
|
||||
model = CNNWithTransformer(nls=nls).to(device)
|
||||
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.001)
|
||||
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5)
|
||||
criterion = FocalLoss(alpha=1, gamma=2).to(device)
|
||||
scaler = GradScaler()
|
||||
|
||||
best_val_loss = float('inf')
|
||||
early_stop_counter = 0
|
||||
y_true_train, y_pred_train = [], []
|
||||
|
||||
for epoch in range(n_epochs):
|
||||
model.train()
|
||||
train_loss, train_acc = [], []
|
||||
|
||||
for inputs, labels in train_loader:
|
||||
inputs, labels = inputs.to(device), labels.to(device)
|
||||
optimizer.zero_grad()
|
||||
|
||||
with autocast():
|
||||
outputs = model(inputs)
|
||||
loss = criterion(outputs, labels)
|
||||
|
||||
scaler.scale(loss).backward()
|
||||
scaler.step(optimizer)
|
||||
scaler.update()
|
||||
|
||||
_, preds = torch.max(outputs, 1)
|
||||
y_true_train.extend(labels.cpu().numpy())
|
||||
y_pred_train.extend(preds.cpu().numpy())
|
||||
acc = accuracy_score(labels.cpu(), preds.cpu())
|
||||
train_loss.append(loss.item())
|
||||
train_acc.append(acc)
|
||||
|
||||
# 验证集评估
|
||||
model.eval()
|
||||
val_loss = []
|
||||
with torch.no_grad():
|
||||
for inputs, labels in val_loader:
|
||||
inputs, labels = inputs.to(device), labels.to(device)
|
||||
outputs = model(inputs)
|
||||
loss = criterion(outputs, labels)
|
||||
val_loss.append(loss.item())
|
||||
|
||||
avg_val_loss = np.mean(val_loss)
|
||||
avg_train_loss = np.mean(train_loss)
|
||||
avg_train_acc = np.mean(train_acc)
|
||||
|
||||
print(f"Epoch [{epoch+1}/{n_epochs}] - Train Loss: {avg_train_loss:.4f}, Train Acc: {avg_train_acc:.4f}, Val Loss: {avg_val_loss:.4f}")
|
||||
|
||||
if avg_val_loss < best_val_loss:
|
||||
best_val_loss = avg_val_loss
|
||||
early_stop_counter = 0
|
||||
torch.save(model.state_dict(), model_path)
|
||||
print("Model improved and saved.")
|
||||
else:
|
||||
early_stop_counter += 1
|
||||
print(f"No improvement. Early stop counter: {early_stop_counter}/{patience}")
|
||||
|
||||
if early_stop_counter >= patience:
|
||||
print("Early stopping triggered.")
|
||||
break
|
||||
|
||||
|
||||
# 训练集指标
|
||||
train_accuracy = accuracy_score(y_true_train, y_pred_train)
|
||||
train_precision = precision_score(y_true_train, y_pred_train, average='weighted')
|
||||
train_recall = recall_score(y_true_train, y_pred_train, average='weighted')
|
||||
train_f1 = f1_score(y_true_train, y_pred_train, average='weighted')
|
||||
train_cm = confusion_matrix(y_true_train, y_pred_train)
|
||||
|
||||
train_metrics = {
|
||||
"accuracy": train_accuracy,
|
||||
"precision": train_precision,
|
||||
"recall": train_recall,
|
||||
"f1_score": train_f1,
|
||||
"confusion_matrix": train_cm
|
||||
}
|
||||
|
||||
return model, train_metrics
|
||||
|
||||
# 测试函数
|
||||
def TransformerTest(X_test, y_test, BATCH_SIZE, nls, model_path):
|
||||
data_test = ZspProcess(X_test, X_test, y_test, y_test, need=True)[1]
|
||||
test_loader = DataLoader(data_test, batch_size=BATCH_SIZE, shuffle=False)
|
||||
|
||||
model = CNNWithTransformer(nls=nls).to(device)
|
||||
model.load_state_dict(torch.load(model_path))
|
||||
model.eval()
|
||||
|
||||
y_true, y_pred = [], []
|
||||
test_loss = []
|
||||
|
||||
criterion = FocalLoss(alpha=1, gamma=2).to(device) # 使用 FocalLoss
|
||||
|
||||
with torch.no_grad():
|
||||
for inputs, labels in test_loader:
|
||||
inputs, labels = inputs.to(device), labels.to(device)
|
||||
outputs = model(inputs)
|
||||
loss = criterion(outputs, labels)
|
||||
|
||||
_, preds = torch.max(outputs, 1)
|
||||
y_true.extend(labels.cpu().numpy())
|
||||
y_pred.extend(preds.cpu().numpy())
|
||||
test_loss.append(loss.item())
|
||||
|
||||
# 测试集指标
|
||||
test_accuracy = accuracy_score(y_true, y_pred)
|
||||
test_precision = precision_score(y_true, y_pred, average='weighted')
|
||||
test_recall = recall_score(y_true, y_pred, average='weighted')
|
||||
test_f1 = f1_score(y_true, y_pred, average='weighted')
|
||||
test_cm = confusion_matrix(y_true, y_pred)
|
||||
|
||||
test_metrics = {
|
||||
"accuracy": test_accuracy,
|
||||
"precision": test_precision,
|
||||
"recall": test_recall,
|
||||
"f1_score": test_f1,
|
||||
"confusion_matrix": test_cm
|
||||
}
|
||||
|
||||
print(f"Accuracy: {test_accuracy:.4f}, Precision: {test_precision:.4f}, Recall: {test_recall:.4f}, F1 Score: {test_f1:.4f}")
|
||||
print(f"Confusion Matrix:\n{test_cm}")
|
||||
return test_metrics
|
||||
|
||||
|
||||
def TransformerTrainAndTest(X,X_test, y, y_test, BATCH_SIZE, n_epochs, nls, model_path, val_split=0.2, patience=10):
|
||||
# 从训练集中划分验证集
|
||||
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=val_split, random_state=42)
|
||||
|
||||
# 训练模型并获取训练指标
|
||||
model, train_metrics = TransformerTrain(X_train, X_val, y_train, y_val, BATCH_SIZE, n_epochs, nls, model_path, patience)
|
||||
|
||||
# 测试模型并获取测试指标
|
||||
test_metrics = TransformerTest(X_test, y_test, BATCH_SIZE, nls, model_path)
|
||||
|
||||
return train_metrics, test_metrics
|
||||
190
classification_model/Classification/CNN_deepseek.py
Normal file
190
classification_model/Classification/CNN_deepseek.py
Normal file
@ -0,0 +1,190 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import numpy as np
|
||||
from torch.utils.data import Dataset, DataLoader
|
||||
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.model_selection import train_test_split
|
||||
import matplotlib.pyplot as plt
|
||||
from sklearn.manifold import TSNE
|
||||
import pandas as pd
|
||||
|
||||
# 设备配置
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
# 动态数据增强数据集
|
||||
class SpectralDataset(Dataset):
|
||||
def __init__(self, X, y, augment=False, input_length=462):
|
||||
# 如果 X 是 DataFrame,则转换为 numpy
|
||||
if isinstance(X, pd.DataFrame):
|
||||
X = X.values # 转换为 numpy 数组
|
||||
|
||||
if isinstance(y, pd.Series) or isinstance(y, pd.DataFrame):
|
||||
y = y.values # 确保 y 也是 numpy 数组
|
||||
|
||||
# 确保 X 形状为 (N, L),然后扩展维度到 (N, 1, L)
|
||||
assert len(X.shape) == 2, f"Expected X to be 2D, got {X.shape}"
|
||||
self.X = torch.tensor(X[:, np.newaxis, :], dtype=torch.float32) # (N, 1, L)
|
||||
self.y = torch.tensor(y, dtype=torch.long) # y 应该是一维的
|
||||
self.augment = augment
|
||||
self.input_length = input_length
|
||||
|
||||
def __getitem__(self, index):
|
||||
x = self.X[index] # Shape: (1, L)
|
||||
y = self.y[index]
|
||||
|
||||
if self.augment:
|
||||
# 添加噪声
|
||||
if torch.rand(1) < 0.7:
|
||||
noise_level = torch.rand(1) * 0.05
|
||||
x += noise_level * torch.randn_like(x)
|
||||
|
||||
# 光谱平移
|
||||
if torch.rand(1) < 0.5:
|
||||
shift = torch.randint(-5, 5, (1,)).item()
|
||||
x = torch.roll(x, shifts=shift, dims=-1)
|
||||
|
||||
# 局部遮挡
|
||||
if torch.rand(1) < 0.3:
|
||||
start = torch.randint(0, self.input_length - 10, (1,)).item()
|
||||
x[0, start:start + 10] = 0.0
|
||||
|
||||
return x, y
|
||||
|
||||
def __len__(self):
|
||||
return len(self.X)
|
||||
|
||||
# 光谱注意力模块
|
||||
class SpectralAttention(nn.Module):
|
||||
def __init__(self, channel, reduction=8):
|
||||
super().__init__()
|
||||
self.avg_pool = nn.AdaptiveAvgPool1d(1)
|
||||
self.fc = nn.Sequential(
|
||||
nn.Linear(channel, channel // reduction),
|
||||
nn.GELU(),
|
||||
nn.Linear(channel // reduction, channel),
|
||||
nn.Sigmoid()
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
b, c, l = x.size()
|
||||
y = self.avg_pool(x).view(b, c)
|
||||
y = self.fc(y).view(b, c, 1)
|
||||
return x * y.expand_as(x)
|
||||
|
||||
|
||||
# CNN 模型
|
||||
class AgroSpecCNN(nn.Module):
|
||||
def __init__(self, input_length=462, num_classes=21):
|
||||
super().__init__()
|
||||
self.input_length = input_length
|
||||
self.features = nn.Sequential(
|
||||
nn.Conv1d(1, 64, 5, padding=2), # 使用更大的 kernel
|
||||
nn.BatchNorm1d(64),
|
||||
nn.GELU(),
|
||||
SpectralAttention(64),
|
||||
nn.MaxPool1d(2), # 池化层
|
||||
|
||||
nn.Conv1d(64, 128, 5, padding=2),
|
||||
nn.BatchNorm1d(128),
|
||||
nn.GELU(),
|
||||
SpectralAttention(128),
|
||||
nn.AdaptiveAvgPool1d(self.input_length // 2), # 自适应池化根据输入大小调整
|
||||
|
||||
nn.Conv1d(128, 256, 5, padding=2),
|
||||
nn.BatchNorm1d(256),
|
||||
nn.GELU(),
|
||||
nn.AdaptiveAvgPool1d(1) # 最终池化为 1 维
|
||||
)
|
||||
|
||||
self.classifier = nn.Sequential(
|
||||
nn.Linear(256, 128),
|
||||
nn.GELU(),
|
||||
nn.Dropout(0.3),
|
||||
nn.Linear(128, num_classes)
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.features(x)
|
||||
x = x.view(x.size(0), -1) # 扁平化处理
|
||||
return self.classifier(x)
|
||||
|
||||
|
||||
# 训练过程
|
||||
def CNNTrain(X_train, y_train, BATCH_SIZE, n_epochs, input_length, num_classes, model_path):
|
||||
train_set = SpectralDataset(X_train, y_train, augment=True, input_length=input_length)
|
||||
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
|
||||
|
||||
model = AgroSpecCNN(input_length, num_classes).to(device)
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)
|
||||
|
||||
for epoch in range(n_epochs):
|
||||
model.train()
|
||||
total_loss, correct, total = 0, 0, 0
|
||||
|
||||
for x, y in train_loader:
|
||||
x, y = x.to(device), y.to(device)
|
||||
|
||||
optimizer.zero_grad()
|
||||
outputs = model(x)
|
||||
loss = criterion(outputs, y)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
total_loss += loss.item()
|
||||
_, predicted = outputs.max(1)
|
||||
total += y.size(0)
|
||||
correct += predicted.eq(y).sum().item()
|
||||
|
||||
print(f"Epoch {epoch+1}/{n_epochs} - Loss: {total_loss / len(train_loader):.4f}, Accuracy: {correct / total:.4f}")
|
||||
|
||||
torch.save(model.state_dict(), model_path)
|
||||
return {"train_loss": total_loss / len(train_loader), "train_accuracy": correct / total}
|
||||
|
||||
|
||||
# 测试过程
|
||||
def CNNTest(X_test, y_test, BATCH_SIZE, input_length, num_classes, model_path):
|
||||
test_set = SpectralDataset(X_test, y_test, augment=False, input_length=input_length)
|
||||
test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=False)
|
||||
|
||||
model = AgroSpecCNN(input_length, num_classes).to(device)
|
||||
model.load_state_dict(torch.load(model_path))
|
||||
model.eval()
|
||||
|
||||
total_loss, correct, total = 0, 0, 0
|
||||
all_preds, all_targets = [], []
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
|
||||
with torch.no_grad():
|
||||
for x, y in test_loader:
|
||||
x, y = x.to(device), y.to(device)
|
||||
|
||||
outputs = model(x)
|
||||
loss = criterion(outputs, y)
|
||||
|
||||
total_loss += loss.item()
|
||||
_, predicted = outputs.max(1)
|
||||
total += y.size(0)
|
||||
correct += predicted.eq(y).sum().item()
|
||||
|
||||
all_preds.extend(predicted.cpu().numpy())
|
||||
all_targets.extend(y.cpu().numpy())
|
||||
|
||||
metrics = {
|
||||
"test_loss": total_loss / len(test_loader),
|
||||
"test_accuracy": correct / total,
|
||||
"precision": precision_score(all_targets, all_preds, average='weighted'),
|
||||
"recall": recall_score(all_targets, all_preds, average='weighted'),
|
||||
"f1": f1_score(all_targets, all_preds, average='weighted'),
|
||||
"confusion_matrix": confusion_matrix(all_targets, all_preds)
|
||||
}
|
||||
return metrics
|
||||
|
||||
|
||||
# 统一的 CNN 训练与测试调用
|
||||
def CNN_deepseek(X_train, X_test, y_train, y_test, BATCH_SIZE, n_epochs, input_length, num_classes, model_path):
|
||||
train_metrics = CNNTrain(X_train, y_train, BATCH_SIZE, n_epochs, input_length, num_classes, model_path)
|
||||
test_metrics = CNNTest(X_test, y_test, BATCH_SIZE, input_length, num_classes, model_path)
|
||||
return train_metrics, test_metrics
|
||||
309
classification_model/Classification/CNN_网格搜索.py
Normal file
309
classification_model/Classification/CNN_网格搜索.py
Normal file
@ -0,0 +1,309 @@
|
||||
import os
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
import torch.nn.functional as F
|
||||
from torch.utils.data import Dataset, DataLoader
|
||||
from torch.cuda.amp import GradScaler, autocast
|
||||
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
|
||||
# 设置设备和TensorBoard记录器
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
writer = SummaryWriter() # 初始化 TensorBoard
|
||||
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
|
||||
|
||||
|
||||
# ---------------------------
|
||||
# 数据集及数据预处理函数
|
||||
# ---------------------------
|
||||
class MyDataset(Dataset):
|
||||
"""
|
||||
自定义数据集,支持数据增强(在训练时添加噪声)
|
||||
"""
|
||||
|
||||
def __init__(self, specs, labels, augment=False):
|
||||
self.specs = specs
|
||||
self.labels = labels
|
||||
self.augment = augment
|
||||
|
||||
def __getitem__(self, index):
|
||||
spec, target = self.specs[index], self.labels[index]
|
||||
if self.augment:
|
||||
noise = 0.01 * torch.randn_like(spec)
|
||||
spec = spec + noise
|
||||
return spec, target
|
||||
|
||||
def __len__(self):
|
||||
return len(self.specs)
|
||||
|
||||
|
||||
def ZspProcess(X_train, X_test, y_train, y_test, need=True):
|
||||
"""
|
||||
标准化数据并转换为Tensor,转换后数据形状为 (样本数, 1, 特征数)
|
||||
"""
|
||||
if need:
|
||||
scaler = StandardScaler()
|
||||
X_train = scaler.fit_transform(X_train)
|
||||
X_test = scaler.transform(X_test)
|
||||
X_train = torch.tensor(X_train[:, np.newaxis, :], dtype=torch.float32)
|
||||
X_test = torch.tensor(X_test[:, np.newaxis, :], dtype=torch.float32)
|
||||
y_train = torch.tensor(y_train, dtype=torch.long)
|
||||
y_test = torch.tensor(y_test, dtype=torch.long)
|
||||
|
||||
data_train = MyDataset(X_train, y_train, augment=True)
|
||||
data_test = MyDataset(X_test, y_test, augment=False)
|
||||
return data_train, data_test
|
||||
|
||||
|
||||
# ---------------------------
|
||||
# 模型定义
|
||||
# ---------------------------
|
||||
class CNN3Layers(nn.Module):
|
||||
"""
|
||||
三层1D卷积神经网络,支持自定义卷积层后Dropout率以及全连接层Dropout率
|
||||
"""
|
||||
|
||||
def __init__(self, nls, dropout_conv=0.3, dropout_fc=0.5):
|
||||
super(CNN3Layers, self).__init__()
|
||||
self.CONV1 = nn.Sequential(
|
||||
nn.Conv1d(1, 64, kernel_size=5, stride=1, padding=2),
|
||||
nn.BatchNorm1d(64),
|
||||
nn.ReLU(),
|
||||
nn.MaxPool1d(kernel_size=2, stride=2),
|
||||
nn.Dropout(dropout_conv)
|
||||
)
|
||||
self.CONV2 = nn.Sequential(
|
||||
nn.Conv1d(64, 128, kernel_size=5, stride=1, padding=2),
|
||||
nn.BatchNorm1d(128),
|
||||
nn.ReLU(),
|
||||
nn.MaxPool1d(kernel_size=2, stride=2),
|
||||
nn.Dropout(dropout_conv)
|
||||
)
|
||||
self.CONV3 = nn.Sequential(
|
||||
nn.Conv1d(128, 256, kernel_size=3, stride=1, padding=1),
|
||||
nn.BatchNorm1d(256),
|
||||
nn.ReLU(),
|
||||
nn.AdaptiveMaxPool1d(1),
|
||||
nn.Dropout(dropout_conv)
|
||||
)
|
||||
self.fc = nn.Sequential(
|
||||
nn.Linear(256, 128),
|
||||
nn.ReLU(),
|
||||
nn.Dropout(dropout_fc),
|
||||
nn.Linear(128, nls)
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.CONV1(x)
|
||||
x = self.CONV2(x)
|
||||
x = self.CONV3(x)
|
||||
x = x.view(x.size(0), -1)
|
||||
out = self.fc(x)
|
||||
return out
|
||||
|
||||
|
||||
# ---------------------------
|
||||
# 训练与测试函数
|
||||
# ---------------------------
|
||||
def CNNTrain(X_train, X_test, y_train, y_test, BATCH_SIZE, n_epochs, nls, model_path, dropout_conv, dropout_fc):
|
||||
"""
|
||||
训练过程:训练指定轮次,记录训练与测试指标,并保存测试准确率最高的模型
|
||||
"""
|
||||
data_train, data_test = ZspProcess(X_train, X_test, y_train, y_test, need=True)
|
||||
train_loader = DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=True)
|
||||
test_loader = DataLoader(data_test, batch_size=BATCH_SIZE, shuffle=False)
|
||||
|
||||
model = CNN3Layers(nls=nls, dropout_conv=dropout_conv, dropout_fc=dropout_fc).to(device)
|
||||
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=0.001)
|
||||
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)
|
||||
criterion = nn.CrossEntropyLoss().to(device)
|
||||
scaler = GradScaler()
|
||||
|
||||
best_acc = 0.0
|
||||
# 用于记录最后一次测试的预测结果(用于计算混淆矩阵等指标)
|
||||
final_y_true, final_y_pred = [], []
|
||||
|
||||
for epoch in range(n_epochs):
|
||||
model.train()
|
||||
train_acc_list, train_loss_list = [], []
|
||||
|
||||
for inputs, labels in train_loader:
|
||||
inputs = inputs.to(device)
|
||||
labels = labels.to(device)
|
||||
optimizer.zero_grad()
|
||||
with autocast():
|
||||
outputs = model(inputs)
|
||||
loss = criterion(outputs, labels)
|
||||
scaler.scale(loss).backward()
|
||||
scaler.step(optimizer)
|
||||
scaler.update()
|
||||
_, predicted = torch.max(outputs.data, 1)
|
||||
acc = accuracy_score(labels.cpu(), predicted.cpu())
|
||||
train_acc_list.append(acc)
|
||||
train_loss_list.append(loss.item())
|
||||
|
||||
avg_train_loss = np.mean(train_loss_list)
|
||||
avg_train_acc = np.mean(train_acc_list)
|
||||
writer.add_scalar('Loss/train', avg_train_loss, epoch)
|
||||
writer.add_scalar('Accuracy/train', avg_train_acc, epoch)
|
||||
|
||||
# 测试过程
|
||||
model.eval()
|
||||
test_acc_list, test_loss_list = [], []
|
||||
test_precision_list, test_recall_list, test_f1_list = [], [], []
|
||||
y_true, y_pred = [], []
|
||||
with torch.no_grad():
|
||||
for inputs, labels in test_loader:
|
||||
inputs = inputs.to(device)
|
||||
labels = labels.to(device)
|
||||
with autocast():
|
||||
outputs = model(inputs)
|
||||
loss = criterion(outputs, labels)
|
||||
_, predicted = torch.max(outputs.data, 1)
|
||||
acc = accuracy_score(labels.cpu(), predicted.cpu())
|
||||
prec = precision_score(labels.cpu(), predicted.cpu(), average='weighted', zero_division=1)
|
||||
rec = recall_score(labels.cpu(), predicted.cpu(), average='weighted', zero_division=1)
|
||||
f1 = f1_score(labels.cpu(), predicted.cpu(), average='weighted', zero_division=1)
|
||||
|
||||
y_true.extend(labels.cpu().numpy())
|
||||
y_pred.extend(predicted.cpu().numpy())
|
||||
test_acc_list.append(acc)
|
||||
test_loss_list.append(loss.item())
|
||||
test_precision_list.append(prec)
|
||||
test_recall_list.append(rec)
|
||||
test_f1_list.append(f1)
|
||||
|
||||
avg_test_loss = np.mean(test_loss_list)
|
||||
avg_test_acc = np.mean(test_acc_list)
|
||||
avg_test_precision = np.mean(test_precision_list)
|
||||
avg_test_recall = np.mean(test_recall_list)
|
||||
avg_test_f1 = np.mean(test_f1_list)
|
||||
|
||||
writer.add_scalar('Loss/test', avg_test_loss, epoch)
|
||||
writer.add_scalar('Accuracy/test', avg_test_acc, epoch)
|
||||
writer.add_scalar('Precision/test', avg_test_precision, epoch)
|
||||
writer.add_scalar('Recall/test', avg_test_recall, epoch)
|
||||
writer.add_scalar('F1_Score/test', avg_test_f1, epoch)
|
||||
|
||||
print(f"Epoch [{epoch + 1}/{n_epochs}]: Train Loss={avg_train_loss:.4f}, Train Acc={avg_train_acc:.4f} | "
|
||||
f"Test Loss={avg_test_loss:.4f}, Test Acc={avg_test_acc:.4f}, Precision={avg_test_precision:.4f}, "
|
||||
f"Recall={avg_test_recall:.4f}, F1={avg_test_f1:.4f}")
|
||||
|
||||
# 如果当前测试准确率更好则保存模型
|
||||
if avg_test_acc > best_acc:
|
||||
best_acc = avg_test_acc
|
||||
torch.save(model.state_dict(), model_path)
|
||||
final_y_true = y_true.copy()
|
||||
final_y_pred = y_pred.copy()
|
||||
|
||||
scheduler.step(avg_test_loss)
|
||||
|
||||
train_metrics = {
|
||||
"train_loss": avg_train_loss,
|
||||
"train_accuracy": avg_train_acc
|
||||
}
|
||||
test_metrics = {
|
||||
"test_loss": avg_test_loss,
|
||||
"test_accuracy": avg_test_acc,
|
||||
"precision": avg_test_precision,
|
||||
"recall": avg_test_recall,
|
||||
"f1_score": avg_test_f1,
|
||||
"confusion_matrix": confusion_matrix(final_y_true, final_y_pred)
|
||||
}
|
||||
return train_metrics, test_metrics
|
||||
|
||||
|
||||
def CNNTest(X_test, y_test, BATCH_SIZE, nls, model_path, dropout_conv, dropout_fc):
|
||||
"""
|
||||
加载保存的模型,并在测试集上计算各项指标
|
||||
"""
|
||||
# 仅对测试集进行标准化处理
|
||||
scaler = StandardScaler()
|
||||
X_test = scaler.fit_transform(X_test)
|
||||
X_test = torch.tensor(X_test[:, np.newaxis, :], dtype=torch.float32)
|
||||
y_test = torch.tensor(y_test, dtype=torch.long)
|
||||
|
||||
data_test = MyDataset(X_test, y_test, augment=False)
|
||||
test_loader = DataLoader(data_test, batch_size=BATCH_SIZE, shuffle=False)
|
||||
|
||||
model = CNN3Layers(nls=nls, dropout_conv=dropout_conv, dropout_fc=dropout_fc).to(device)
|
||||
model.load_state_dict(torch.load(model_path, map_location=device))
|
||||
model.eval()
|
||||
|
||||
y_true, y_pred = [], []
|
||||
test_loss_list = []
|
||||
criterion = nn.CrossEntropyLoss().to(device)
|
||||
|
||||
with torch.no_grad():
|
||||
for inputs, labels in test_loader:
|
||||
inputs = inputs.to(device)
|
||||
labels = labels.to(device)
|
||||
outputs = model(inputs)
|
||||
loss = criterion(outputs, labels)
|
||||
_, predicted = torch.max(outputs.data, 1)
|
||||
y_true.extend(labels.cpu().numpy())
|
||||
y_pred.extend(predicted.cpu().numpy())
|
||||
test_loss_list.append(loss.item())
|
||||
|
||||
avg_loss = np.mean(test_loss_list)
|
||||
acc = accuracy_score(y_true, y_pred)
|
||||
prec = precision_score(y_true, y_pred, average='weighted', zero_division=1)
|
||||
rec = recall_score(y_true, y_pred, average='weighted', zero_division=1)
|
||||
f1 = f1_score(y_true, y_pred, average='weighted', zero_division=1)
|
||||
|
||||
test_metrics = {
|
||||
"test_loss": avg_loss,
|
||||
"test_accuracy": acc,
|
||||
"precision": prec,
|
||||
"recall": rec,
|
||||
"f1_score": f1,
|
||||
"confusion_matrix": confusion_matrix(y_true, y_pred)
|
||||
}
|
||||
return test_metrics
|
||||
|
||||
|
||||
# ---------------------------
|
||||
# 自定义随机搜索超参数优化函数
|
||||
# ---------------------------
|
||||
def optimize_hyperparameters(X_train, X_test, y_train, y_test, nls, n_iter=10, BATCH_SIZE=32, n_epochs=10):
|
||||
"""
|
||||
随机搜索指定次数,每次随机采样超参数(这里以 dropout_conv 和 dropout_fc 为例),
|
||||
对模型进行训练和测试,最后返回使测试准确率最高的超参数配置以及对应的训练和测试指标。
|
||||
"""
|
||||
best_test_acc = -1.0
|
||||
best_params = None
|
||||
best_train_metrics = None
|
||||
best_test_metrics = None
|
||||
|
||||
for i in range(n_iter):
|
||||
# 从均匀分布中随机采样超参数
|
||||
dropout_conv = np.random.uniform(0.2, 0.7) # 可根据需要调整取值范围
|
||||
dropout_fc = np.random.uniform(0.3, 0.8)
|
||||
print(f"\nIteration {i + 1}/{n_iter}: Testing dropout_conv={dropout_conv:.4f}, dropout_fc={dropout_fc:.4f}")
|
||||
|
||||
# 指定模型保存路径(每次覆盖保存最佳模型)
|
||||
model_path = "best_model.pth"
|
||||
|
||||
# 训练模型
|
||||
train_metrics, _ = CNNTrain(X_train, X_test, y_train, y_test, BATCH_SIZE, n_epochs, nls, model_path,
|
||||
dropout_conv, dropout_fc)
|
||||
# 评估测试指标(加载保存的最佳模型)
|
||||
test_metrics = CNNTest(X_test, y_test, BATCH_SIZE, nls, model_path, dropout_conv, dropout_fc)
|
||||
|
||||
current_test_acc = test_metrics["test_accuracy"]
|
||||
print(f"Iteration {i + 1} result: Test Accuracy = {current_test_acc:.4f}")
|
||||
|
||||
# 更新最佳超参数
|
||||
if current_test_acc > best_test_acc:
|
||||
best_test_acc = current_test_acc
|
||||
best_params = {"dropout_conv": dropout_conv, "dropout_fc": dropout_fc}
|
||||
best_train_metrics = train_metrics
|
||||
best_test_metrics = test_metrics
|
||||
|
||||
return best_params, best_train_metrics, best_test_metrics
|
||||
|
||||
|
||||
|
||||
327
classification_model/Classification/ClassicCls.py
Normal file
327
classification_model/Classification/ClassicCls.py
Normal file
@ -0,0 +1,327 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from sklearn.neural_network import MLPClassifier
|
||||
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, precision_score, recall_score, f1_score
|
||||
import sklearn.svm as svm
|
||||
from sklearn.cross_decomposition import PLSRegression
|
||||
from sklearn.ensemble import RandomForestClassifier
|
||||
from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split
|
||||
import xgboost as xgb
|
||||
import lightgbm as lgb
|
||||
import catboost as cb
|
||||
# import torch
|
||||
# from torch import nn, optim
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.ensemble import AdaBoostClassifier
|
||||
from sklearn.tree import DecisionTreeClassifier
|
||||
from sklearn.ensemble import AdaBoostClassifier
|
||||
from sklearn.tree import DecisionTreeClassifier
|
||||
from sklearn.neighbors import KNeighborsClassifier
|
||||
# 固定随机种子
|
||||
def set_random_seed(seed=42):
|
||||
np.random.seed(seed)
|
||||
|
||||
set_random_seed()
|
||||
|
||||
# 交叉验证(多核心支持)
|
||||
def cross_validate_model(model, X, y, cv=5, n_jobs=-1):
|
||||
"""
|
||||
多核心交叉验证
|
||||
"""
|
||||
scores = cross_val_score(model, X, y, cv=cv, n_jobs=n_jobs)
|
||||
print(f"Cross-validation accuracy: {scores.mean():.4f} ± {scores.std():.4f}")
|
||||
return scores
|
||||
|
||||
# 混淆矩阵与分类报告
|
||||
def evaluate_model(y_true, y_pred, dataset_name="Test"):
|
||||
"""
|
||||
性能评估,包含分类报告和混淆矩阵。
|
||||
"""
|
||||
print(f"{dataset_name} Classification Report:")
|
||||
print(classification_report(y_true, y_pred))
|
||||
|
||||
# 计算混淆矩阵
|
||||
cm = confusion_matrix(y_true, y_pred)
|
||||
|
||||
# 返回多个性能指标的字典,包括混淆矩阵
|
||||
return {
|
||||
"accuracy": accuracy_score(y_true, y_pred),
|
||||
"precision": precision_score(y_true, y_pred, average='weighted'),
|
||||
"recall": recall_score(y_true, y_pred, average='weighted'),
|
||||
"f1_score": f1_score(y_true, y_pred, average='weighted'),
|
||||
"confusion_matrix": cm
|
||||
}
|
||||
|
||||
# 神经网络模型(ANN)
|
||||
# 神经网络模型(ANN)
|
||||
|
||||
|
||||
# 逻辑回归模型 (Logistic Regression)
|
||||
def LogisticRegressionModel(X_train, X_test, y_train, y_test, penalty='l2', C=1.0, solver='lbfgs', max_iter=200):
|
||||
"""
|
||||
逻辑回归模型(适用于多分类任务)
|
||||
:param penalty: 正则化类型 ('l1', 'l2', 'elasticnet', 'none')
|
||||
:param C: 正则化强度的倒数(较小的 C 代表更强的正则化)
|
||||
:param solver: 优化算法('lbfgs', 'liblinear', 'saga', etc.)
|
||||
:param max_iter: 训练的最大迭代次数
|
||||
"""
|
||||
|
||||
# 使用 multinomial 来处理多分类问题
|
||||
model = LogisticRegression(penalty=penalty, C=C, solver=solver, max_iter=max_iter, multi_class='multinomial', random_state=1)
|
||||
|
||||
# 交叉验证
|
||||
cross_validate_model(model, X_train, y_train)
|
||||
|
||||
# 模型拟合
|
||||
model.fit(X_train, y_train.ravel())
|
||||
|
||||
# 训练集评估
|
||||
y_train_pred = model.predict(X_train)
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, dataset_name="Train")
|
||||
|
||||
# 测试集评估
|
||||
y_test_pred = model.predict(X_test)
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, dataset_name="Test")
|
||||
|
||||
return train_metrics, test_metrics
|
||||
|
||||
# SVM 模型
|
||||
def SVM(X_train, X_test, y_train, y_test, kernel='linear', C=1, gamma=1e-3):
|
||||
clf = svm.SVC(C=C, kernel=kernel, gamma=gamma)
|
||||
|
||||
# 交叉验证
|
||||
cross_validate_model(clf, X_train, y_train)
|
||||
|
||||
# 模型拟合
|
||||
clf.fit(X_train, y_train.ravel())
|
||||
|
||||
# 训练集评估
|
||||
y_train_pred = clf.predict(X_train)
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, dataset_name="Train")
|
||||
|
||||
# 测试集评估
|
||||
y_test_pred = clf.predict(X_test)
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, dataset_name="Test")
|
||||
|
||||
return train_metrics, test_metrics
|
||||
|
||||
# PLS-DA 模型
|
||||
def PLS_DA(X_train, X_test, y_train, y_test, n_components=40):
|
||||
y_train = pd.get_dummies(y_train) # One-hot 编码
|
||||
model = PLSRegression(n_components=n_components)
|
||||
|
||||
# 模型拟合
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
# 训练集评估
|
||||
y_train_pred = model.predict(X_train)
|
||||
y_train_pred = np.argmax(y_train_pred, axis=1)
|
||||
train_metrics = evaluate_model(np.argmax(y_train.values, axis=1), y_train_pred, dataset_name="Train")
|
||||
|
||||
# 测试集评估
|
||||
y_test_pred = model.predict(X_test)
|
||||
y_test_pred = np.argmax(y_test_pred, axis=1)
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, dataset_name="Test")
|
||||
|
||||
return train_metrics, test_metrics
|
||||
|
||||
# 随机森林模型(RF)
|
||||
def RF(X_train, X_test, y_train, y_test, n_estimators=200, max_depth=15, n_jobs=-1):
|
||||
clf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=1, n_jobs=n_jobs)
|
||||
|
||||
# 交叉验证
|
||||
cross_validate_model(clf, X_train, y_train, n_jobs=n_jobs)
|
||||
|
||||
# 模型拟合
|
||||
clf.fit(X_train, y_train.ravel())
|
||||
|
||||
# 训练集评估
|
||||
y_train_pred = clf.predict(X_train)
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, dataset_name="Train")
|
||||
|
||||
# 测试集评估
|
||||
y_test_pred = clf.predict(X_test)
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, dataset_name="Test")
|
||||
|
||||
return train_metrics, test_metrics
|
||||
|
||||
# XGBoost 模型
|
||||
|
||||
# 网格搜索超参数优化
|
||||
|
||||
|
||||
|
||||
# 神经网络模型(ANN)使用 PyTorch 实现
|
||||
# def ANN(X_train, X_test, y_train, y_test, hidden_layer_sizes=(50, 30), max_iter=500):
|
||||
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 检测 GPU
|
||||
# X_train = torch.tensor(X_train, device=device, dtype=torch.float32)
|
||||
# X_test = torch.tensor(X_test, device=device, dtype=torch.float32)
|
||||
# y_train = torch.tensor(y_train, device=device, dtype=torch.long)
|
||||
# y_test = torch.tensor(y_test, device=device, dtype=torch.long)
|
||||
#
|
||||
# # 定义简单的神经网络
|
||||
# class SimpleNN(nn.Module):
|
||||
# def __init__(self, input_size, hidden_sizes, output_size):
|
||||
# super(SimpleNN, self).__init__()
|
||||
# self.fc1 = nn.Linear(input_size, hidden_sizes[0])
|
||||
# self.fc2 = nn.Linear(hidden_sizes[0], hidden_sizes[1])
|
||||
# self.fc3 = nn.Linear(hidden_sizes[1], output_size)
|
||||
#
|
||||
# def forward(self, x):
|
||||
# x = torch.relu(self.fc1(x))
|
||||
# x = torch.relu(self.fc2(x))
|
||||
# x = self.fc3(x)
|
||||
# return x
|
||||
#
|
||||
# model = SimpleNN(X_train.shape[1], hidden_layer_sizes, len(torch.unique(y_train))).to(device)
|
||||
# criterion = nn.CrossEntropyLoss()
|
||||
# optimizer = optim.Adam(model.parameters(), lr=0.001)
|
||||
#
|
||||
# # 训练模型
|
||||
# for epoch in range(max_iter):
|
||||
# optimizer.zero_grad()
|
||||
# outputs = model(X_train)
|
||||
# loss = criterion(outputs, y_train)
|
||||
# loss.backward()
|
||||
# optimizer.step()
|
||||
#
|
||||
# # 训练集评估
|
||||
# with torch.no_grad():
|
||||
# y_train_pred = torch.argmax(model(X_train), dim=1)
|
||||
# train_metrics = evaluate_model(y_train.cpu(), y_train_pred.cpu(), dataset_name="Train")
|
||||
#
|
||||
# y_test_pred = torch.argmax(model(X_test), dim=1)
|
||||
# test_metrics = evaluate_model(y_test.cpu(), y_test_pred.cpu(), dataset_name="Test")
|
||||
#
|
||||
# return train_metrics, test_metrics
|
||||
|
||||
# XGBoost 模型
|
||||
def XGBoost(X_train, X_test, y_train, y_test, n_estimators=100, learning_rate=0.1, max_depth=3):
|
||||
model = xgb.XGBClassifier(
|
||||
n_estimators=n_estimators,
|
||||
learning_rate=learning_rate,
|
||||
max_depth=max_depth,
|
||||
random_state=1,
|
||||
# tree_method='gpu_hist', # 使用 GPU 加速
|
||||
gpu_id=0
|
||||
)
|
||||
|
||||
# 模型拟合
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
# 训练集评估
|
||||
y_train_pred = model.predict(X_train)
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, dataset_name="Train")
|
||||
|
||||
# 测试集评估
|
||||
y_test_pred = model.predict(X_test)
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, dataset_name="Test")
|
||||
|
||||
return train_metrics, test_metrics
|
||||
|
||||
# LightGBM 模型
|
||||
def LightGBM(X_train, X_test, y_train, y_test, n_estimators=100, learning_rate=0.1, max_depth=-1, num_leaves=31):
|
||||
model = lgb.LGBMClassifier(
|
||||
n_estimators=n_estimators,
|
||||
learning_rate=learning_rate,
|
||||
max_depth=max_depth,
|
||||
num_leaves=num_leaves,
|
||||
random_state=1,
|
||||
# device='gpu' # 使用 GPU 加速
|
||||
)
|
||||
|
||||
# 模型拟合
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
# 训练集评估
|
||||
y_train_pred = model.predict(X_train)
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, dataset_name="Train")
|
||||
|
||||
# 测试集评估
|
||||
y_test_pred = model.predict(X_test)
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, dataset_name="Test")
|
||||
|
||||
return train_metrics, test_metrics
|
||||
|
||||
# CatBoost 模型
|
||||
def CatBoost(X_train, X_test, y_train, y_test, iterations=500, learning_rate=0.1, depth=6):
|
||||
model = cb.CatBoostClassifier(
|
||||
iterations=iterations,
|
||||
learning_rate=learning_rate,
|
||||
depth=depth,
|
||||
random_seed=1,
|
||||
# task_type='GPU', # 使用 GPU
|
||||
verbose=0
|
||||
)
|
||||
|
||||
# 模型拟合
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
# 训练集评估
|
||||
y_train_pred = model.predict(X_train)
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, dataset_name="Train")
|
||||
|
||||
# 测试集评估
|
||||
y_test_pred = model.predict(X_test)
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, dataset_name="Test")
|
||||
|
||||
return train_metrics, test_metrics
|
||||
|
||||
|
||||
|
||||
# AdaBoost 模型
|
||||
def AdaBoost(X_train, X_test, y_train, y_test, n_estimators=50, learning_rate=1.0):
|
||||
"""
|
||||
AdaBoost多分类模型的实现
|
||||
:param n_estimators: 基学习器的数量(迭代次数)
|
||||
:param learning_rate: 学习率(对每个基学习器的贡献进行缩放)
|
||||
"""
|
||||
# 使用决策树作为基学习器
|
||||
base_estimator = DecisionTreeClassifier(max_depth=1)
|
||||
|
||||
# 创建AdaBoost模型,并移除不必要的参数
|
||||
model = AdaBoostClassifier(
|
||||
base_estimator=base_estimator,
|
||||
n_estimators=n_estimators,
|
||||
learning_rate=learning_rate,
|
||||
random_state=1
|
||||
)
|
||||
|
||||
# 模型拟合
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
# 训练集评估
|
||||
y_train_pred = model.predict(X_train)
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, dataset_name="Train")
|
||||
|
||||
# 测试集评估
|
||||
y_test_pred = model.predict(X_test)
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, dataset_name="Test")
|
||||
|
||||
return train_metrics, test_metrics
|
||||
|
||||
def KNN(X_train, X_test, y_train, y_test, n_neighbors=5, weights='uniform', algorithm='auto'):
|
||||
"""
|
||||
K-Nearest Neighbors 模型实现
|
||||
:param n_neighbors: 最近邻的数量
|
||||
:param weights: 'uniform' 或 'distance',决定邻居的权重
|
||||
:param algorithm: 'auto', 'ball_tree', 'kd_tree', 'brute',用于计算邻居的算法
|
||||
"""
|
||||
# 创建 KNN 模型
|
||||
model = KNeighborsClassifier(n_neighbors=n_neighbors, weights=weights, algorithm=algorithm)
|
||||
|
||||
# 交叉验证
|
||||
cross_validate_model(model, X_train, y_train)
|
||||
|
||||
# 模型拟合
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
# 训练集评估
|
||||
y_train_pred = model.predict(X_train)
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, dataset_name="Train")
|
||||
|
||||
# 测试集评估
|
||||
y_test_pred = model.predict(X_test)
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, dataset_name="Test")
|
||||
|
||||
return train_metrics, test_metrics
|
||||
235
classification_model/Classification/ClassicClsHY.py
Normal file
235
classification_model/Classification/ClassicClsHY.py
Normal file
@ -0,0 +1,235 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, precision_score, recall_score, f1_score
|
||||
import sklearn.svm as svm
|
||||
from sklearn.cross_decomposition import PLSRegression
|
||||
from sklearn.ensemble import RandomForestClassifier
|
||||
from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split
|
||||
from skopt import BayesSearchCV
|
||||
from skopt.space import Real, Integer
|
||||
from xgboost import XGBClassifier
|
||||
import lightgbm as lgb
|
||||
import catboost as cb
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.model_selection import StratifiedKFold
|
||||
from sklearn.neural_network import MLPClassifier
|
||||
# 固定随机种子
|
||||
def set_random_seed(seed=42):
|
||||
np.random.seed(seed)
|
||||
|
||||
set_random_seed()
|
||||
|
||||
# 交叉验证(多核心支持)
|
||||
def cross_validate_model(model, X, y, cv=5, n_jobs=-1):
|
||||
"""
|
||||
多核心交叉验证
|
||||
"""
|
||||
scores = cross_val_score(model, X, y, cv=cv, n_jobs=n_jobs)
|
||||
print(f"Cross-validation accuracy: {scores.mean():.4f} ± {scores.std():.4f}")
|
||||
return scores
|
||||
|
||||
# 混淆矩阵与分类报告
|
||||
def evaluate_model(y_true, y_pred, dataset_name="Test"):
|
||||
"""
|
||||
性能评估,包含分类报告和混淆矩阵。
|
||||
"""
|
||||
print(f"{dataset_name} Classification Report:")
|
||||
print(classification_report(y_true, y_pred))
|
||||
|
||||
# 计算混淆矩阵
|
||||
cm = confusion_matrix(y_true, y_pred)
|
||||
|
||||
# 返回多个性能指标的字典,包括F1分数
|
||||
return {
|
||||
"accuracy": accuracy_score(y_true, y_pred),
|
||||
"precision": precision_score(y_true, y_pred, average='weighted'),
|
||||
"recall": recall_score(y_true, y_pred, average='weighted'),
|
||||
"f1_score": f1_score(y_true, y_pred, average='weighted'),
|
||||
"confusion_matrix": cm
|
||||
}
|
||||
|
||||
# 逻辑回归模型 (Logistic Regression)
|
||||
from sklearn.svm import SVC
|
||||
from sklearn.neighbors import KNeighborsClassifier
|
||||
from sklearn.ensemble import RandomForestClassifier
|
||||
import xgboost as xgb
|
||||
import lightgbm as lgb
|
||||
import catboost as cb
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
# 1. SVM 贝叶斯优化
|
||||
def optimize_SVM(X_train, y_train, X_test, y_test):
|
||||
param_space = {
|
||||
'C': (0.01, 10.0, 'uniform'),
|
||||
'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
|
||||
'gamma': (1e-4, 1e-1, 'log-uniform')
|
||||
}
|
||||
|
||||
model = SVC()
|
||||
optimizer = BayesSearchCV(model, param_space, n_iter=50, cv=5, n_jobs=-1, verbose=0, scoring='f1_weighted') # 使用f1_weighted作为评分标准
|
||||
optimizer.fit(X_train, y_train)
|
||||
|
||||
best_params = optimizer.best_params_
|
||||
|
||||
# 使用最优超参数训练并评估模型
|
||||
best_model = optimizer.best_estimator_
|
||||
y_train_pred = best_model.predict(X_train)
|
||||
y_test_pred = best_model.predict(X_test)
|
||||
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, dataset_name="Train")
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, dataset_name="Test")
|
||||
|
||||
return best_params, train_metrics, test_metrics
|
||||
|
||||
# 2. KNN 贝叶斯优化
|
||||
def optimize_KNN(X_train, y_train, X_test, y_test):
|
||||
param_space = {
|
||||
'n_neighbors': (1, 20),
|
||||
'weights': ['uniform', 'distance'],
|
||||
'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']
|
||||
}
|
||||
|
||||
model = KNeighborsClassifier()
|
||||
optimizer = BayesSearchCV(model, param_space, n_iter=50, cv=5, n_jobs=-1, verbose=0, scoring='f1_weighted') # 使用f1_weighted作为评分标准
|
||||
optimizer.fit(X_train, y_train)
|
||||
|
||||
best_params = optimizer.best_params_
|
||||
|
||||
# 使用最优超参数训练并评估模型
|
||||
best_model = optimizer.best_estimator_
|
||||
y_train_pred = best_model.predict(X_train)
|
||||
y_test_pred = best_model.predict(X_test)
|
||||
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, dataset_name="Train")
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, dataset_name="Test")
|
||||
|
||||
return best_params, train_metrics, test_metrics
|
||||
|
||||
# 3. XGBoost 贝叶斯优化
|
||||
def optimize_XGBoost(X_train, y_train, X_test, y_test):
|
||||
param_space = {
|
||||
'n_estimators': Integer(50, 500),
|
||||
'max_depth': Integer(3, 10),
|
||||
'learning_rate': Real(1e-4, 1.0, prior='log-uniform'),
|
||||
'subsample': Real(0.1, 1.0),
|
||||
'colsample_bytree': Real(0.1, 1.0)
|
||||
}
|
||||
|
||||
model = XGBClassifier(tree_method='gpu_hist', gpu_id=0)
|
||||
optimizer = BayesSearchCV(model, param_space, n_iter=50, cv=5, n_jobs=-1, verbose=0, scoring='f1_weighted') # 使用f1_weighted作为评分标准
|
||||
|
||||
optimizer.fit(X_train, y_train)
|
||||
|
||||
best_params = optimizer.best_params_
|
||||
best_model = optimizer.best_estimator_
|
||||
|
||||
y_train_pred = best_model.predict(X_train)
|
||||
y_test_pred = best_model.predict(X_test)
|
||||
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, dataset_name="Train")
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, dataset_name="Test")
|
||||
|
||||
return best_params, train_metrics, test_metrics
|
||||
|
||||
# 4. Random Forest 贝叶斯优化
|
||||
def optimize_RF(X_train, y_train, X_test, y_test):
|
||||
param_space = {
|
||||
'n_estimators': (50, 500),
|
||||
'max_depth': (3, 15),
|
||||
'min_samples_split': (2, 20),
|
||||
'min_samples_leaf': (1, 20),
|
||||
'max_features': ['auto', 'sqrt', 'log2']
|
||||
}
|
||||
|
||||
model = RandomForestClassifier(random_state=42)
|
||||
optimizer = BayesSearchCV(model, param_space, n_iter=50, cv=5, n_jobs=-1, verbose=0, scoring='f1_weighted') # 使用f1_weighted作为评分标准
|
||||
optimizer.fit(X_train, y_train)
|
||||
|
||||
best_params = optimizer.best_params_
|
||||
|
||||
# 使用最优超参数训练并评估模型
|
||||
best_model = optimizer.best_estimator_
|
||||
y_train_pred = best_model.predict(X_train)
|
||||
y_test_pred = best_model.predict(X_test)
|
||||
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, dataset_name="Train")
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, dataset_name="Test")
|
||||
|
||||
return best_params, train_metrics, test_metrics
|
||||
|
||||
# 5. CatBoost 贝叶斯优化
|
||||
def optimize_CatBoost(X_train, y_train, X_test, y_test):
|
||||
param_space = {
|
||||
'iterations': (50, 500),
|
||||
'learning_rate': (0.01, 0.3, 'uniform'),
|
||||
'depth': (3, 10),
|
||||
'l2_leaf_reg': (1, 10, 'uniform'),
|
||||
'bagging_temperature': (0, 1, 'uniform')
|
||||
}
|
||||
|
||||
model = cb.CatBoostClassifier(task_type='GPU', random_seed=42, verbose=0)
|
||||
optimizer = BayesSearchCV(model, param_space, n_iter=50, cv=5, n_jobs=-1, verbose=0, scoring='f1_weighted') # 使用f1_weighted作为评分标准
|
||||
optimizer.fit(X_train, y_train)
|
||||
|
||||
best_params = optimizer.best_params_
|
||||
|
||||
# 使用最优超参数训练并评估模型
|
||||
best_model = optimizer.best_estimator_
|
||||
y_train_pred = best_model.predict(X_train)
|
||||
y_test_pred = best_model.predict(X_test)
|
||||
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, dataset_name="Train")
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, dataset_name="Test")
|
||||
|
||||
return best_params, train_metrics, test_metrics
|
||||
|
||||
# 6. Logistic Regression 贝叶斯优化
|
||||
def optimize_LogisticRegression(X_train, y_train, X_test, y_test):
|
||||
param_space = {
|
||||
'C': (1e-5, 1e5, 'log-uniform'),
|
||||
'penalty': ['l1', 'l2'],
|
||||
'solver': ['lbfgs', 'liblinear', 'saga']
|
||||
}
|
||||
|
||||
model = LogisticRegression(multi_class='multinomial', random_state=42)
|
||||
optimizer = BayesSearchCV(model, param_space, n_iter=50, cv=5, n_jobs=-1, verbose=0, scoring='f1_weighted') # 使用f1_weighted作为评分标准
|
||||
optimizer.fit(X_train, y_train)
|
||||
|
||||
best_params = optimizer.best_params_
|
||||
|
||||
# 使用最优超参数训练并评估模型
|
||||
best_model = optimizer.best_estimator_
|
||||
y_train_pred = best_model.predict(X_train)
|
||||
y_test_pred = best_model.predict(X_test)
|
||||
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, dataset_name="Train")
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, dataset_name="Test")
|
||||
|
||||
return best_params, train_metrics, test_metrics
|
||||
|
||||
# 7. Neural Network (ANN) 贝叶斯优化
|
||||
def optimize_ANN(X_train, y_train, X_test, y_test):
|
||||
param_space = {
|
||||
'hidden_layer_sizes': [(10,), (50,), (100,), (10, 10), (50, 50)],
|
||||
'activation': ['relu', 'tanh', 'logistic'],
|
||||
'solver': ['adam', 'sgd'],
|
||||
'alpha': (1e-5, 1e-1, 'log-uniform'),
|
||||
'learning_rate': ['constant', 'invscaling', 'adaptive']
|
||||
}
|
||||
|
||||
model = MLPClassifier(max_iter=500, random_state=42)
|
||||
optimizer = BayesSearchCV(model, param_space, n_iter=50, cv=5, n_jobs=-1, verbose=0, scoring='f1_weighted') # 使用f1_weighted作为评分标准
|
||||
optimizer.fit(X_train, y_train)
|
||||
|
||||
best_params = optimizer.best_params_
|
||||
|
||||
# 使用最优超参数训练并评估模型
|
||||
best_model = optimizer.best_estimator_
|
||||
y_train_pred = best_model.predict(X_train)
|
||||
y_test_pred = best_model.predict(X_test)
|
||||
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, dataset_name="Train")
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, dataset_name="Test")
|
||||
|
||||
return best_params, train_metrics, test_metrics
|
||||
306
classification_model/Classification/ClassicCls_网格搜索.py
Normal file
306
classification_model/Classification/ClassicCls_网格搜索.py
Normal file
@ -0,0 +1,306 @@
|
||||
import numpy as np
|
||||
from sklearn.metrics import f1_score, classification_report
|
||||
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split, StratifiedKFold
|
||||
from scipy.stats import loguniform, randint
|
||||
from xgboost import XGBClassifier
|
||||
import lightgbm as lgb
|
||||
import catboost as cb
|
||||
from sklearn.svm import SVC
|
||||
from sklearn.neighbors import KNeighborsClassifier
|
||||
from sklearn.ensemble import RandomForestClassifier
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
|
||||
import gc
|
||||
import os
|
||||
|
||||
|
||||
# 固定随机种子
|
||||
def set_random_seed(seed=42):
|
||||
np.random.seed(seed)
|
||||
os.environ['PYTHONHASHSEED'] = str(seed)
|
||||
|
||||
|
||||
set_random_seed()
|
||||
|
||||
|
||||
# 性能评估函数
|
||||
def evaluate_model(y_true, y_pred, dataset_name="Test"):
|
||||
print(f"\n{dataset_name} Classification Report:")
|
||||
print(classification_report(y_true, y_pred))
|
||||
|
||||
cm = confusion_matrix(y_true, y_pred)
|
||||
return {
|
||||
"accuracy": accuracy_score(y_true, y_pred),
|
||||
"precision": precision_score(y_true, y_pred, average='weighted'),
|
||||
"recall": recall_score(y_true, y_pred, average='weighted'),
|
||||
"f1_score": f1_score(y_true, y_pred, average='weighted'),
|
||||
"confusion_matrix": cm
|
||||
}
|
||||
|
||||
|
||||
# 优化XGBoost
|
||||
def optimize_XGBoost(X_train, y_train, X_test, y_test):
|
||||
param_dist = {
|
||||
'max_depth': randint(3, 10), # 控制树的最大深度
|
||||
'learning_rate': loguniform(1e-3, 0.2), # 控制每棵树对最终结果的贡献
|
||||
'subsample': [0.6, 0.8, 1.0], # 在每次迭代中随机选择部分样本进行训练
|
||||
'colsample_bytree': [0.6, 0.8, 1.0], # 在每棵树的构建过程中,随机选择部分特征
|
||||
'n_estimators': randint(100, 300), # 树的数量
|
||||
'min_child_weight': randint(1, 10), # 子叶节点的最小样本权重和
|
||||
'gamma': [0, 0.1, 0.2] # 控制模型切分节点的最小损失函数值
|
||||
}
|
||||
|
||||
model = XGBClassifier(
|
||||
tree_method='gpu_hist',
|
||||
gpu_id=0,
|
||||
use_label_encoder=False,
|
||||
eval_metric='mlogloss',
|
||||
objective='multi:softmax',
|
||||
num_class=len(np.unique(y_train))
|
||||
)
|
||||
|
||||
optimizer = RandomizedSearchCV(
|
||||
model,
|
||||
param_distributions=param_dist,
|
||||
n_iter=30,
|
||||
cv=StratifiedKFold(n_splits=3),
|
||||
scoring='f1_weighted',
|
||||
n_jobs=-1,
|
||||
verbose=1
|
||||
)
|
||||
optimizer.fit(X_train, y_train)
|
||||
|
||||
best_params = optimizer.best_params_
|
||||
print(f"Best XGBoost Hyperparameters: {best_params}")
|
||||
best_model = optimizer.best_estimator_
|
||||
|
||||
y_train_pred = best_model.predict(X_train)
|
||||
y_test_pred = best_model.predict(X_test)
|
||||
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, "Train")
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, "Test")
|
||||
|
||||
gc.collect()
|
||||
return best_params, train_metrics, test_metrics
|
||||
|
||||
|
||||
# 优化LightGBM
|
||||
def optimize_LightGBM(X_train, y_train, X_test, y_test):
|
||||
param_dist = {
|
||||
'num_leaves': randint(20, 50), # 控制树的复杂度
|
||||
'learning_rate': loguniform(1e-3, 0.2), # 控制每棵树对最终结果的贡献
|
||||
'subsample': [0.6, 0.8, 1.0], # 在每次迭代中随机选择部分样本进行训练
|
||||
'colsample_bytree': [0.6, 0.8, 1.0], # 在每棵树的构建过程中随机选择部分特征
|
||||
'n_estimators': randint(100, 300), # 树的数量
|
||||
'min_child_samples': randint(10, 100), # 叶子节点的最小样本数
|
||||
'max_depth': [None, 3, 5, 7] # 树的最大深度
|
||||
}
|
||||
|
||||
model = lgb.LGBMClassifier(
|
||||
device_type='gpu',
|
||||
objective='multiclass',
|
||||
num_class=len(np.unique(y_train))
|
||||
)
|
||||
|
||||
optimizer = RandomizedSearchCV(
|
||||
model,
|
||||
param_distributions=param_dist,
|
||||
n_iter=30,
|
||||
cv=StratifiedKFold(n_splits=3),
|
||||
scoring='f1_weighted',
|
||||
n_jobs=-1,
|
||||
verbose=1
|
||||
)
|
||||
optimizer.fit(X_train, y_train)
|
||||
|
||||
best_params = optimizer.best_params_
|
||||
print(f"Best LightGBM Hyperparameters: {best_params}")
|
||||
best_model = optimizer.best_estimator_
|
||||
|
||||
y_train_pred = best_model.predict(X_train)
|
||||
y_test_pred = best_model.predict(X_test)
|
||||
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, "Train")
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, "Test")
|
||||
|
||||
gc.collect()
|
||||
return best_params, train_metrics, test_metrics
|
||||
|
||||
|
||||
# 优化CatBoost
|
||||
def optimize_CatBoost(X_train, y_train, X_test, y_test):
|
||||
param_dist = {
|
||||
'depth': randint(4, 8), # 控制树的深度
|
||||
'learning_rate': loguniform(1e-3, 0.2), # 控制每棵树对最终结果的贡献
|
||||
'l2_leaf_reg': randint(1, 10), # L2正则化系数
|
||||
'iterations': randint(100, 300), # 树的数量
|
||||
'border_count': [32, 64, 128] # 分割点的数量
|
||||
}
|
||||
|
||||
model = cb.CatBoostClassifier(
|
||||
task_type='GPU',
|
||||
verbose=0,
|
||||
loss_function='MultiClass'
|
||||
)
|
||||
|
||||
optimizer = RandomizedSearchCV(
|
||||
model,
|
||||
param_distributions=param_dist,
|
||||
n_iter=30,
|
||||
cv=StratifiedKFold(n_splits=3),
|
||||
scoring='f1_weighted',
|
||||
n_jobs=-1,
|
||||
verbose=1
|
||||
)
|
||||
optimizer.fit(X_train, y_train)
|
||||
|
||||
best_params = optimizer.best_params_
|
||||
print(f"Best CatBoost Hyperparameters: {best_params}")
|
||||
best_model = optimizer.best_estimator_
|
||||
|
||||
y_train_pred = best_model.predict(X_train)
|
||||
y_test_pred = best_model.predict(X_test)
|
||||
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, "Train")
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, "Test")
|
||||
|
||||
gc.collect()
|
||||
return best_params, train_metrics, test_metrics
|
||||
|
||||
|
||||
# 优化SVM
|
||||
def optimize_SVM(X_train, y_train, X_test, y_test):
|
||||
param_dist = {
|
||||
'C': loguniform(1e-2, 10), # 惩罚参数
|
||||
'kernel': ['linear', 'rbf'], # 核函数
|
||||
'gamma': loguniform(1e-4, 1e-1) # 核函数的系数
|
||||
}
|
||||
|
||||
model = SVC(probability=True)
|
||||
|
||||
optimizer = RandomizedSearchCV(
|
||||
model,
|
||||
param_distributions=param_dist,
|
||||
n_iter=30,
|
||||
cv=StratifiedKFold(n_splits=3),
|
||||
scoring='f1_weighted',
|
||||
n_jobs=-1,
|
||||
verbose=1
|
||||
)
|
||||
optimizer.fit(X_train, y_train)
|
||||
|
||||
best_params = optimizer.best_params_
|
||||
print(f"Best SVM Hyperparameters: {best_params}")
|
||||
best_model = optimizer.best_estimator_
|
||||
|
||||
y_train_pred = best_model.predict(X_train)
|
||||
y_test_pred = best_model.predict(X_test)
|
||||
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, "Train")
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, "Test")
|
||||
|
||||
return best_params, train_metrics, test_metrics
|
||||
|
||||
|
||||
# 优化KNN
|
||||
def optimize_KNN(X_train, y_train, X_test, y_test):
|
||||
param_grid = {
|
||||
'n_neighbors': list(range(3, 20, 2)), # 邻居的数量
|
||||
'weights': ['uniform', 'distance'], # 权重函数
|
||||
'p': [1, 2] # 距离度量
|
||||
}
|
||||
|
||||
model = KNeighborsClassifier(algorithm='brute')
|
||||
|
||||
optimizer = GridSearchCV(
|
||||
model,
|
||||
param_grid=param_grid,
|
||||
cv=StratifiedKFold(n_splits=3),
|
||||
scoring='f1_weighted',
|
||||
n_jobs=-1,
|
||||
verbose=1
|
||||
)
|
||||
optimizer.fit(X_train, y_train)
|
||||
|
||||
best_params = optimizer.best_params_
|
||||
print(f"Best KNN Hyperparameters: {best_params}")
|
||||
best_model = optimizer.best_estimator_
|
||||
|
||||
y_train_pred = best_model.predict(X_train)
|
||||
y_test_pred = best_model.predict(X_test)
|
||||
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, "Train")
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, "Test")
|
||||
|
||||
return best_params, train_metrics, test_metrics
|
||||
|
||||
|
||||
# 优化LogisticRegression
|
||||
def optimize_LogisticRegression(X_train, y_train, X_test, y_test):
|
||||
param_grid = {
|
||||
'C': loguniform(1e-4, 1e2), # 正则化强度
|
||||
'penalty': ['l2', None], # 正则化类型
|
||||
'solver': ['lbfgs', 'sag', 'saga'] # 优化算法
|
||||
}
|
||||
|
||||
model = LogisticRegression(max_iter=1000, random_state=42)
|
||||
|
||||
optimizer = RandomizedSearchCV(
|
||||
model,
|
||||
param_distributions=param_grid,
|
||||
n_iter=30,
|
||||
cv=StratifiedKFold(n_splits=3),
|
||||
scoring='f1_weighted',
|
||||
n_jobs=-1,
|
||||
verbose=1
|
||||
)
|
||||
optimizer.fit(X_train, y_train)
|
||||
|
||||
best_params = optimizer.best_params_
|
||||
print(f"Best Logistic Regression Hyperparameters: {best_params}")
|
||||
best_model = optimizer.best_estimator_
|
||||
|
||||
y_train_pred = best_model.predict(X_train)
|
||||
y_test_pred = best_model.predict(X_test)
|
||||
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, "Train")
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, "Test")
|
||||
|
||||
return best_params, train_metrics, test_metrics
|
||||
|
||||
|
||||
# 优化RandomForest
|
||||
def optimize_RF(X_train, y_train, X_test, y_test):
|
||||
param_dist = {
|
||||
'n_estimators': randint(100, 300), # 树的数量
|
||||
'max_depth': [None, 3, 5, 7], # 树的最大深度
|
||||
'min_samples_split': randint(2, 10), # 分裂内部节点所需的最小样本数
|
||||
'min_samples_leaf': randint(1, 10), # 叶子节点的最小样本数
|
||||
'bootstrap': [True, False], # 是否使用自助法采样
|
||||
'criterion': ['gini', 'entropy'] # 划分的标准
|
||||
}
|
||||
|
||||
model = RandomForestClassifier(random_state=42)
|
||||
|
||||
optimizer = RandomizedSearchCV(
|
||||
model,
|
||||
param_distributions=param_dist,
|
||||
n_iter=30,
|
||||
cv=StratifiedKFold(n_splits=3),
|
||||
scoring='f1_weighted',
|
||||
n_jobs=-1,
|
||||
verbose=1
|
||||
)
|
||||
optimizer.fit(X_train, y_train)
|
||||
|
||||
best_params = optimizer.best_params_
|
||||
print(f"Best Random Forest Hyperparameters: {best_params}")
|
||||
best_model = optimizer.best_estimator_
|
||||
|
||||
y_train_pred = best_model.predict(X_train)
|
||||
y_test_pred = best_model.predict(X_test)
|
||||
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, "Train")
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, "Test")
|
||||
|
||||
return best_params, train_metrics, test_metrics
|
||||
49
classification_model/Classification/Cls.py
Normal file
49
classification_model/Classification/Cls.py
Normal file
@ -0,0 +1,49 @@
|
||||
from classification_model.Classification.ClassicCls import SVM, PLS_DA, RF, XGBoost, LightGBM, CatBoost,LogisticRegressionModel,AdaBoost,KNN
|
||||
# from Classification.CNN import CNN
|
||||
# from Classification.CNN_Transfomer import TransformerTrainAndTest
|
||||
# from Classification.CNN_SAE import SAETrainAndTest
|
||||
# from Classification.SAE import SAE
|
||||
# from Classification.CNN_deepseek import CNN_deepseek
|
||||
from multiprocessing import Pool, cpu_count
|
||||
|
||||
|
||||
def QualitativeAnalysis(model, X_train, X_test, y_train, y_test, n_jobs=-1):
|
||||
"""
|
||||
根据模型名称调用不同的分类模型,并返回训练集和测试集的评估指标。
|
||||
|
||||
参数:
|
||||
- model: 要使用的分类模型名称
|
||||
- X_train, X_test: 训练集和测试集的特征数据
|
||||
- y_train, y_test: 训练集和测试集的标签数据
|
||||
- n_jobs: 使用的核心数量,适用于支持多线程的模型
|
||||
|
||||
返回:
|
||||
- train_metrics: 包含训练集 accuracy, precision, recall, f1_score 的字典
|
||||
- test_metrics: 包含测试集 accuracy, precision, recall, f1_score 的字典
|
||||
"""
|
||||
|
||||
if model == "PLS_DA":
|
||||
train_metrics, test_metrics = PLS_DA(X_train, X_test, y_train, y_test)
|
||||
elif model == "ANN":
|
||||
train_metrics, test_metrics = ANN(X_train, X_test, y_train, y_test)
|
||||
elif model == "SVM":
|
||||
train_metrics, test_metrics = SVM(X_train, X_test, y_train, y_test)
|
||||
elif model == "RF":
|
||||
train_metrics, test_metrics = RF(X_train, X_test, y_train, y_test, n_jobs=n_jobs)
|
||||
elif model == "LogisticRegression":
|
||||
train_metrics, test_metrics = LogisticRegressionModel(X_train, X_test, y_train, y_test, penalty='l2', C=1.0, solver='lbfgs')
|
||||
elif model == "XGBoost":
|
||||
train_metrics, test_metrics = XGBoost(X_train, X_test, y_train, y_test, n_estimators=100, learning_rate=0.1, max_depth=3)
|
||||
elif model == "LightGBM":
|
||||
train_metrics, test_metrics = LightGBM(X_train, X_test, y_train, y_test, n_estimators=100, learning_rate=0.1, max_depth=-1, num_leaves=31)
|
||||
elif model == "CatBoost":
|
||||
train_metrics, test_metrics = CatBoost(X_train, X_test, y_train, y_test, iterations=500, learning_rate=0.1, depth=6)
|
||||
elif model == "AdaBoost":
|
||||
train_metrics, test_metrics = AdaBoost(X_train, X_test, y_train, y_test, n_estimators=50, learning_rate=1.0)
|
||||
elif model == 'KNN':
|
||||
train_metrics, test_metrics = KNN(X_train, X_test, y_train, y_test, n_neighbors=5)
|
||||
else:
|
||||
print("No such model for Qualitative Analysis")
|
||||
return None, None
|
||||
|
||||
return train_metrics, test_metrics
|
||||
47
classification_model/Classification/Cls_网格搜索.py
Normal file
47
classification_model/Classification/Cls_网格搜索.py
Normal file
@ -0,0 +1,47 @@
|
||||
|
||||
# from Classification.CNN_HYper import
|
||||
from classification_model.Classification.CNN_Transfomer import TransformerTrainAndTest
|
||||
from classification_model.Classification.CNN_SAE import SAETrainAndTest
|
||||
from classification_model.Classification.SAE import SAE
|
||||
from classification_model.Classification.CNN_deepseek import CNN_deepseek
|
||||
from multiprocessing import Pool, cpu_count
|
||||
|
||||
# 贝叶斯优化模型调用
|
||||
from classification_model.Classification.ClassicCls_网格搜索 import optimize_SVM, optimize_KNN, optimize_XGBoost, optimize_RF, optimize_CatBoost, optimize_LogisticRegression
|
||||
|
||||
def QualitativeAnalysis(model, X_train, X_test, y_train, y_test, n_jobs=-1):
|
||||
"""
|
||||
根据模型名称调用不同的分类模型,并返回训练集和测试集的评估指标。
|
||||
|
||||
参数:
|
||||
- model: 要使用的分类模型名称
|
||||
- X_train, X_test: 训练集和测试集的特征数据
|
||||
- y_train, y_test: 训练集和测试集的标签数据
|
||||
- n_jobs: 使用的核心数量,适用于支持多线程的模型
|
||||
|
||||
返回:
|
||||
- train_metrics: 包含训练集 accuracy, precision, recall, f1_score 的字典
|
||||
- test_metrics: 包含测试集 accuracy, precision, recall, f1_score 的字典
|
||||
"""
|
||||
|
||||
|
||||
|
||||
if model == "SVM":
|
||||
best_params, train_metrics, test_metrics = optimize_SVM(X_train, y_train, X_test, y_test)
|
||||
elif model == "RF":
|
||||
best_params, train_metrics, test_metrics = optimize_RF(X_train, y_train, X_test, y_test)
|
||||
# elif model == "optimize_CNN":
|
||||
# best_params, train_metrics, test_metrics = optimize_hyperparameters(X_train, X_test, y_train, y_test, nls=10, n_iter=10)
|
||||
elif model == "LogisticRegression":
|
||||
best_params, train_metrics, test_metrics = optimize_LogisticRegression(X_train, y_train, X_test, y_test)
|
||||
elif model == "XGBoost":
|
||||
best_params, train_metrics, test_metrics = optimize_XGBoost(X_train, y_train, X_test, y_test)
|
||||
elif model == "CatBoost":
|
||||
best_params, train_metrics, test_metrics = optimize_CatBoost(X_train, y_train, X_test, y_test)
|
||||
elif model == 'KNN':
|
||||
best_params, train_metrics, test_metrics = optimize_KNN(X_train, y_train, X_test, y_test)
|
||||
else:
|
||||
print("No such model for Qualitative Analysis")
|
||||
return None, None
|
||||
|
||||
return best_params,train_metrics, test_metrics
|
||||
48
classification_model/Classification/Cls_超参数.py
Normal file
48
classification_model/Classification/Cls_超参数.py
Normal file
@ -0,0 +1,48 @@
|
||||
|
||||
from classification_model.Classification.CNN_HYper import optimize_CNN
|
||||
from classification_model.Classification.CNN_Transfomer import TransformerTrainAndTest
|
||||
from classification_model.Classification.CNN_SAE import SAETrainAndTest
|
||||
from classification_model.Classification.SAE import SAE
|
||||
from classification_model.Classification.CNN_deepseek import CNN_deepseek
|
||||
from multiprocessing import Pool, cpu_count
|
||||
|
||||
# 贝叶斯优化模型调用
|
||||
from classification_model.Classification.ClassicClsHY import optimize_SVM, optimize_KNN, optimize_XGBoost, optimize_RF, optimize_CatBoost, optimize_LogisticRegression, optimize_ANN
|
||||
|
||||
def QualitativeAnalysis(model, X_train, X_test, y_train, y_test, n_jobs=-1):
|
||||
"""
|
||||
根据模型名称调用不同的分类模型,并返回训练集和测试集的评估指标。
|
||||
|
||||
参数:
|
||||
- model: 要使用的分类模型名称
|
||||
- X_train, X_test: 训练集和测试集的特征数据
|
||||
- y_train, y_test: 训练集和测试集的标签数据
|
||||
- n_jobs: 使用的核心数量,适用于支持多线程的模型
|
||||
|
||||
返回:
|
||||
- train_metrics: 包含训练集 accuracy, precision, recall, f1_score 的字典
|
||||
- test_metrics: 包含测试集 accuracy, precision, recall, f1_score 的字典
|
||||
"""
|
||||
|
||||
|
||||
if model == "ANN":
|
||||
best_params, train_metrics, test_metrics = optimize_ANN(X_train, y_train, X_test, y_test)
|
||||
elif model == "SVM":
|
||||
best_params, train_metrics, test_metrics = optimize_SVM(X_train, y_train, X_test, y_test)
|
||||
elif model == "RF":
|
||||
best_params, train_metrics, test_metrics = optimize_RF(X_train, y_train, X_test, y_test)
|
||||
elif model == "optimize_CNN":
|
||||
best_params,train_metrics, test_metrics = optimize_CNN(X_train, X_test, y_train, y_test, model_path=r'H:\arithmetic\python\opensa-main(local)\opensa-main\OpenSA\tensorboard_logs\model_best.pth')
|
||||
elif model == "LogisticRegression":
|
||||
best_params, train_metrics, test_metrics = optimize_LogisticRegression(X_train, y_train, X_test, y_test)
|
||||
elif model == "XGBoost":
|
||||
best_params, train_metrics, test_metrics = optimize_XGBoost(X_train, y_train, X_test, y_test)
|
||||
elif model == "CatBoost":
|
||||
best_params, train_metrics, test_metrics = optimize_CatBoost(X_train, y_train, X_test, y_test)
|
||||
elif model == 'KNN':
|
||||
best_params, train_metrics, test_metrics = optimize_KNN(X_train, y_train, X_test, y_test)
|
||||
else:
|
||||
print("No such model for Qualitative Analysis")
|
||||
return None, None
|
||||
|
||||
return best_params,train_metrics, test_metrics
|
||||
11
classification_model/Classification/DeepCls.py
Normal file
11
classification_model/Classification/DeepCls.py
Normal file
@ -0,0 +1,11 @@
|
||||
"""
|
||||
-*- coding: utf-8 -*-
|
||||
@Time :2022/04/12 17:10
|
||||
@Author : Pengyou FU
|
||||
@blogs : https://blog.csdn.net/Echo_Code?spm=1000.2115.3001.5343
|
||||
@github : https://github.com/FuSiry/OpenSA
|
||||
@WeChat : Fu_siry
|
||||
@License:Apache-2.0 license
|
||||
|
||||
"""
|
||||
|
||||
190
classification_model/Classification/SAE.py
Normal file
190
classification_model/Classification/SAE.py
Normal file
@ -0,0 +1,190 @@
|
||||
import torch
|
||||
from torch import nn
|
||||
import torch.nn.functional as F
|
||||
from torch.autograd import Variable
|
||||
from torch import optim
|
||||
import torch.utils.data as data
|
||||
import numpy as np
|
||||
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
|
||||
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
|
||||
class MyDataset(data.Dataset):
|
||||
def __init__(self, specs, labels):
|
||||
self.specs = specs
|
||||
self.labels = labels
|
||||
|
||||
def __getitem__(self, index):
|
||||
spec, target = self.specs[index], self.labels[index]
|
||||
return spec, target
|
||||
|
||||
def __len__(self):
|
||||
return len(self.specs)
|
||||
|
||||
|
||||
class AutoEncoder(nn.Module):
|
||||
def __init__(self, inputDim, hiddenDim):
|
||||
super().__init__()
|
||||
self.inputDim = inputDim
|
||||
self.hiddenDim = hiddenDim
|
||||
self.encoder = nn.Linear(inputDim, hiddenDim, bias=True)
|
||||
self.decoder = nn.Linear(hiddenDim, inputDim, bias=True)
|
||||
self.act = F.relu
|
||||
|
||||
def forward(self, x, rep=False):
|
||||
hidden = self.encoder(x)
|
||||
hidden = self.act(hidden)
|
||||
if rep:
|
||||
return hidden
|
||||
else:
|
||||
out = self.decoder(hidden)
|
||||
return out
|
||||
|
||||
|
||||
class SAE(nn.Module):
|
||||
def __init__(self, encoderList, output_dim):
|
||||
super().__init__()
|
||||
self.encoderList = encoderList
|
||||
self.en1 = encoderList[0]
|
||||
self.en2 = encoderList[1]
|
||||
self.fc = nn.Linear(128, output_dim, bias=True) # 分类层输出维度为 num_classes
|
||||
|
||||
def forward(self, x):
|
||||
out = x
|
||||
out = self.en1(out, rep=True)
|
||||
out = self.en2(out, rep=True)
|
||||
out = self.fc(out)
|
||||
return out
|
||||
|
||||
|
||||
class SAE_net(object):
|
||||
def __init__(self, AE_epoch=200, SAE_epoch=200,
|
||||
input_dim=404, hidden1_dim=512,
|
||||
hidden2_dim=128, output_dim=4, # 默认4类,可在调用时传入 num_classes
|
||||
batch_size=128):
|
||||
self.AE_epoch = AE_epoch
|
||||
self.SAE_epoch = SAE_epoch
|
||||
self.input_dim = input_dim
|
||||
self.hidden1_dim = hidden1_dim
|
||||
self.hidden2_dim = hidden2_dim
|
||||
self.output_dim = output_dim
|
||||
self.batch_size = batch_size
|
||||
self.train_loader = None
|
||||
|
||||
encoder1 = AutoEncoder(self.input_dim, self.hidden1_dim)
|
||||
encoder2 = AutoEncoder(self.hidden1_dim, self.hidden2_dim)
|
||||
self.encoder_list = [encoder1, encoder2]
|
||||
|
||||
def trainAE(self, x_train, y_train, encoderList, trainLayer, batchSize, epoch, useCuda=False):
|
||||
if useCuda:
|
||||
for encoder in encoderList:
|
||||
encoder.to(device)
|
||||
|
||||
optimizer = optim.Adam(encoderList[trainLayer].parameters())
|
||||
criterion = nn.MSELoss()
|
||||
|
||||
data_train = MyDataset(x_train, y_train)
|
||||
self.train_loader = torch.utils.data.DataLoader(data_train, batch_size=batchSize, shuffle=True)
|
||||
|
||||
for _ in range(epoch):
|
||||
for batch_idx, (x, target) in enumerate(self.train_loader):
|
||||
optimizer.zero_grad()
|
||||
if useCuda:
|
||||
x, target = x.to(device), target.to(device)
|
||||
x = Variable(x).type(torch.FloatTensor)
|
||||
x = x.view(x.size(0), -1)
|
||||
|
||||
out = x
|
||||
if trainLayer != 0:
|
||||
for i in range(trainLayer):
|
||||
out = encoderList[i](out, rep=True)
|
||||
|
||||
pred = encoderList[trainLayer](out, rep=False).cpu()
|
||||
loss = criterion(pred, out)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
def trainClassifier(self, model, epoch, useCuda=False):
|
||||
if useCuda:
|
||||
model = model.to(device)
|
||||
|
||||
for param in model.parameters():
|
||||
param.requires_grad = True
|
||||
|
||||
optimizer = optim.Adam(model.parameters())
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
|
||||
for _ in range(epoch):
|
||||
for batch_idx, (x, target) in enumerate(self.train_loader):
|
||||
optimizer.zero_grad()
|
||||
if useCuda:
|
||||
x, target = x.to(device), target.to(device)
|
||||
x = Variable(x).type(torch.FloatTensor)
|
||||
x = x.view(-1, self.input_dim)
|
||||
|
||||
out = model(x)
|
||||
loss = criterion(out, target)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
self.model = model
|
||||
|
||||
def fit(self, x_train=None, y_train=None, X_test=None, y_test=None):
|
||||
x_train = x_train[:, np.newaxis, :]
|
||||
x_train = torch.from_numpy(x_train).float()
|
||||
|
||||
for i in range(2):
|
||||
self.trainAE(x_train=x_train, y_train=y_train,
|
||||
encoderList=self.encoder_list, trainLayer=i,
|
||||
batchSize=self.batch_size, epoch=self.AE_epoch)
|
||||
|
||||
model = SAE(encoderList=self.encoder_list, output_dim=self.output_dim)
|
||||
|
||||
# 训练分类器并获取训练集的评估指标
|
||||
train_accuracy, train_precision, train_recall, train_f1, train_cm = self.trainClassifier(model=model, epoch=self.SAE_epoch, X_train=x_train, y_train=y_train)
|
||||
|
||||
# 计算测试集的评估指标
|
||||
test_accuracy, test_precision, test_recall, test_f1, test_cm = self.evaluate(model, X_test, y_test)
|
||||
|
||||
# 返回训练集和测试集的评估结果
|
||||
train_metrics = {
|
||||
"accuracy": train_accuracy,
|
||||
"precision": train_precision,
|
||||
"recall": train_recall,
|
||||
"f1_score": train_f1,
|
||||
"confusion_matrix": train_cm
|
||||
}
|
||||
|
||||
test_metrics = {
|
||||
"accuracy": test_accuracy,
|
||||
"precision": test_precision,
|
||||
"recall": test_recall,
|
||||
"f1_score": test_f1,
|
||||
"confusion_matrix": test_cm
|
||||
}
|
||||
|
||||
return train_metrics, test_metrics
|
||||
|
||||
def evaluate(self, model, X_test, y_test):
|
||||
X_test = torch.from_numpy(X_test).float()
|
||||
X_test = X_test[:, np.newaxis, :]
|
||||
X_test = Variable(X_test).view(-1, self.input_dim)
|
||||
|
||||
out = model(X_test)
|
||||
_, y_pred = torch.max(out, 1)
|
||||
|
||||
# 计算准确率、精确率、召回率、F1分数和混淆矩阵
|
||||
accuracy = accuracy_score(y_test, y_pred.numpy())
|
||||
precision = precision_score(y_test, y_pred.numpy(), average='weighted')
|
||||
recall = recall_score(y_test, y_pred.numpy(), average='weighted')
|
||||
f1 = f1_score(y_test, y_pred.numpy(), average='weighted')
|
||||
cm = confusion_matrix(y_test, y_pred.numpy())
|
||||
|
||||
return accuracy, precision, recall, f1, cm
|
||||
|
||||
|
||||
def SAE(X_train, y_train, X_test, y_test, num_classes=4):
|
||||
clf = SAE_net(output_dim=num_classes)
|
||||
train_metrics, test_metrics = clf.fit(X_train, y_train, X_test, y_test)
|
||||
|
||||
return train_metrics, test_metrics
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
220
classification_model/DataLoad/DataLoad.py
Normal file
220
classification_model/DataLoad/DataLoad.py
Normal file
@ -0,0 +1,220 @@
|
||||
"""
|
||||
-*- coding: utf-8 -*-
|
||||
@Time :2022/04/12 17:10
|
||||
@Author : Pengyou FU
|
||||
@blogs : https://blog.csdn.net/Echo_Code?spm=1000.2115.3001.5343
|
||||
@github : https://github.com/FuSiry/OpenSA
|
||||
@WeChat : Fu_siry
|
||||
@License:Apache-2.0 license
|
||||
|
||||
"""
|
||||
|
||||
|
||||
|
||||
from sklearn.model_selection import train_test_split
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
#随机划分数据集
|
||||
def random(data, label, test_ratio=0.2, random_state=123):
|
||||
"""
|
||||
:param data: shape (n_samples, n_features)
|
||||
:param label: shape (n_sample, )
|
||||
:param test_size: the ratio of test_size, default: 0.2
|
||||
:param random_state: the randomseed, default: 123
|
||||
:return: X_train :(n_samples, n_features)
|
||||
X_test: (n_samples, n_features)
|
||||
y_train: (n_sample, )
|
||||
y_test: (n_sample, )
|
||||
"""
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(data, label, test_size=test_ratio, random_state=random_state)#,stratify=label
|
||||
|
||||
return X_train, X_test, y_train, y_test
|
||||
|
||||
def spxy(data, label, test_size=0.2):
|
||||
"""
|
||||
:param data: shape (n_samples, n_features)
|
||||
:param label: shape (n_samples, )
|
||||
:param test_size: the ratio of test_size, default: 0.2
|
||||
:return: X_train :(n_samples, n_features)
|
||||
X_test: (n_samples, n_features)
|
||||
y_train: (n_samples, )
|
||||
y_test: (n_samples, )
|
||||
"""
|
||||
# 确保 data 和 label 是 NumPy 数组
|
||||
data = data.to_numpy() if isinstance(data, pd.DataFrame) else data
|
||||
label = label.to_numpy() if isinstance(label, pd.Series) else label
|
||||
|
||||
# 备份原始数据和标签
|
||||
x_backup = data
|
||||
y_backup = label
|
||||
|
||||
M = data.shape[0]
|
||||
N = round((1 - test_size) * M)
|
||||
samples = np.arange(M)
|
||||
|
||||
# 归一化标签数据
|
||||
label = (label - np.mean(label)) / np.std(label)
|
||||
D = np.zeros((M, M))
|
||||
Dy = np.zeros((M, M))
|
||||
|
||||
# 计算样本之间的距离
|
||||
for i in range(M - 1):
|
||||
xa = data[i, :]
|
||||
ya = label[i]
|
||||
for j in range((i + 1), M):
|
||||
xb = data[j, :]
|
||||
yb = label[j]
|
||||
D[i, j] = np.linalg.norm(xa - xb)
|
||||
Dy[i, j] = np.linalg.norm(ya - yb)
|
||||
|
||||
# 距离归一化
|
||||
Dmax = np.max(D)
|
||||
Dymax = np.max(Dy)
|
||||
D = D / Dmax + Dy / Dymax
|
||||
|
||||
# 找到最远的两个点
|
||||
maxD = D.max(axis=0)
|
||||
index_row = D.argmax(axis=0)
|
||||
index_column = maxD.argmax()
|
||||
|
||||
m = np.zeros(N, dtype=int)
|
||||
m[0] = index_row[index_column]
|
||||
m[1] = index_column
|
||||
|
||||
dminmax = np.zeros(N)
|
||||
dminmax[1] = D[m[0], m[1]]
|
||||
|
||||
# 根据距离选择训练集
|
||||
for i in range(2, N):
|
||||
pool = np.delete(samples, m[:i])
|
||||
dmin = np.zeros(M - i)
|
||||
for j in range(M - i):
|
||||
indexa = pool[j]
|
||||
d = np.zeros(i)
|
||||
for k in range(i):
|
||||
indexb = m[k]
|
||||
if indexa < indexb:
|
||||
d[k] = D[indexa, indexb]
|
||||
else:
|
||||
d[k] = D[indexb, indexa]
|
||||
dmin[j] = np.min(d)
|
||||
dminmax[i] = np.max(dmin)
|
||||
index = np.argmax(dmin)
|
||||
m[i] = pool[index]
|
||||
|
||||
m_complement = np.delete(samples, m)
|
||||
|
||||
# 划分训练集和测试集
|
||||
X_train = data[m, :]
|
||||
y_train = y_backup[m]
|
||||
X_test = data[m_complement, :]
|
||||
y_test = y_backup[m_complement]
|
||||
|
||||
return X_train, X_test, y_train, y_test
|
||||
#利用kennard-stone算法划分数据集
|
||||
def ks(data, label, test_size=0.2):
|
||||
"""
|
||||
:param data: shape (n_samples, n_features)
|
||||
:param label: shape (n_sample, )
|
||||
:param test_size: the ratio of test_size, default: 0.2
|
||||
:return: X_train: (n_samples, n_features)
|
||||
X_test: (n_samples, n_features)
|
||||
y_train: (n_samples, )
|
||||
y_test: (n_samples, )
|
||||
"""
|
||||
# 确保 data 和 label 是 NumPy 数组
|
||||
data = data.to_numpy() if isinstance(data, pd.DataFrame) else data
|
||||
label = label.to_numpy() if isinstance(label, pd.Series) else label
|
||||
|
||||
M = data.shape[0]
|
||||
N = round((1 - test_size) * M)
|
||||
samples = np.arange(M)
|
||||
|
||||
D = np.zeros((M, M))
|
||||
|
||||
for i in range((M - 1)):
|
||||
xa = data[i, :]
|
||||
for j in range((i + 1), M):
|
||||
xb = data[j, :]
|
||||
D[i, j] = np.linalg.norm(xa - xb)
|
||||
|
||||
maxD = np.max(D, axis=0)
|
||||
index_row = np.argmax(D, axis=0)
|
||||
index_column = np.argmax(maxD)
|
||||
|
||||
m = np.zeros(N)
|
||||
m[0] = np.array(index_row[index_column])
|
||||
m[1] = np.array(index_column)
|
||||
m = m.astype(int)
|
||||
dminmax = np.zeros(N)
|
||||
dminmax[1] = D[m[0], m[1]]
|
||||
|
||||
for i in range(2, N):
|
||||
pool = np.delete(samples, m[:i])
|
||||
dmin = np.zeros((M - i))
|
||||
for j in range((M - i)):
|
||||
indexa = pool[j]
|
||||
d = np.zeros(i)
|
||||
for k in range(i):
|
||||
indexb = m[k]
|
||||
if indexa < indexb:
|
||||
d[k] = D[indexa, indexb]
|
||||
else:
|
||||
d[k] = D[indexb, indexa]
|
||||
dmin[j] = np.min(d)
|
||||
dminmax[i] = np.max(dmin)
|
||||
index = np.argmax(dmin)
|
||||
m[i] = pool[index]
|
||||
|
||||
m_complement = np.delete(np.arange(data.shape[0]), m)
|
||||
|
||||
X_train = data[m, :]
|
||||
y_train = label[m]
|
||||
X_test = data[m_complement, :]
|
||||
y_test = label[m_complement]
|
||||
|
||||
return X_train, X_test, y_train, y_test
|
||||
|
||||
# 分别使用一个回归、一个分类的公开数据集做为example
|
||||
def LoadNirtest(type):
|
||||
|
||||
if type == "Rgs":
|
||||
CDataPath1 = r'G:\UAV\dazhou\20m\新,无条带\output.csv'
|
||||
data1 = np.loadtxt(open(CDataPath1, 'rb'), dtype=np.float64, delimiter=',', skiprows=0)
|
||||
|
||||
data = data1[:, 2:]
|
||||
label = data1[:, 0]
|
||||
|
||||
|
||||
elif type == "Cls":
|
||||
path = r"G:\danzhu_test\rgb_refine\reflence\yellow_green_deepgreen\sum.csv"
|
||||
Nirdata = np.loadtxt(open(path, 'rb'), dtype=np.float64, delimiter=',', skiprows=0)
|
||||
data = Nirdata[1:, 1:463]
|
||||
label = Nirdata[1:,0]
|
||||
|
||||
return data, label
|
||||
|
||||
def SetSplit(method, data, label, test_size=0.3, randomseed=123):
|
||||
|
||||
"""
|
||||
:param method: the method to split trainset and testset, include: random, kennard-stone(ks), spxy
|
||||
:param data: shape (n_samples, n_features)
|
||||
:param label: shape (n_sample, )
|
||||
:param test_size: the ratio of test_size, default: 0.2
|
||||
:return: X_train: (n_samples, n_features)
|
||||
X_test: (n_samples, n_features)
|
||||
y_train: (n_sample, )
|
||||
y_test: (n_sample, )
|
||||
"""
|
||||
|
||||
if method == "random":
|
||||
X_train, X_test, y_train, y_test = random(data, label, test_size, randomseed)
|
||||
elif method == "spxy":
|
||||
X_train, X_test, y_train, y_test = spxy(data, label, test_size)
|
||||
elif method == "ks":
|
||||
X_train, X_test, y_train, y_test = ks(data, label, test_size)
|
||||
else:
|
||||
print("no this method of split dataset! ")
|
||||
|
||||
return X_train, X_test, y_train, y_test
|
||||
Binary file not shown.
Binary file not shown.
36
classification_model/Evaluate/RgsEvaluate.py
Normal file
36
classification_model/Evaluate/RgsEvaluate.py
Normal file
@ -0,0 +1,36 @@
|
||||
"""
|
||||
-*- coding: utf-8 -*-
|
||||
@Time :2022/04/12 17:10
|
||||
@Author : Pengyou FU
|
||||
@blogs : https://blog.csdn.net/Echo_Code?spm=1000.2115.3001.5343
|
||||
@github : https://github.com/FuSiry/OpenSA
|
||||
@WeChat : Fu_siry
|
||||
@License:Apache-2.0 license
|
||||
|
||||
"""
|
||||
|
||||
from sklearn.preprocessing import scale,MinMaxScaler,Normalizer,StandardScaler
|
||||
from sklearn.metrics import mean_squared_error,r2_score,mean_absolute_error
|
||||
from sklearn.neural_network import MLPRegressor
|
||||
import numpy as np
|
||||
|
||||
|
||||
def ModelRgsevaluate(y_pred, y_true):
|
||||
|
||||
mse = mean_squared_error(y_true,y_pred)
|
||||
R2 = r2_score(y_true,y_pred)
|
||||
mae = mean_absolute_error(y_true,y_pred)
|
||||
|
||||
return np.sqrt(mse), R2, mae
|
||||
|
||||
def ModelRgsevaluatePro(y_pred, y_true, yscale):
|
||||
|
||||
yscaler = yscale
|
||||
y_true = yscaler.inverse_transform(y_true)
|
||||
y_pred = yscaler.inverse_transform(y_pred)
|
||||
|
||||
mse = mean_squared_error(y_true,y_pred)
|
||||
R2 = r2_score(y_true,y_pred)
|
||||
mae = mean_absolute_error(y_true, y_pred)
|
||||
|
||||
return np.sqrt(mse), R2, mae
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,504 @@
|
||||
{
|
||||
"meta":{"test_sets":[],"test_metrics":[],"learn_metrics":[{"best_value":"Min","name":"Logloss"}],"launch_mode":"Train","parameters":"","iteration_count":500,"learn_sets":["learn"],"name":"experiment"},
|
||||
"iterations":[
|
||||
{"learn":[0.6147067235],"iteration":0,"passed_time":0.09385870875,"remaining_time":46.83549567},
|
||||
{"learn":[0.5597961256],"iteration":1,"passed_time":0.1040763362,"remaining_time":25.91500771},
|
||||
{"learn":[0.5094282629],"iteration":2,"passed_time":0.1115793573,"remaining_time":18.48498019},
|
||||
{"learn":[0.4635694785],"iteration":3,"passed_time":0.1181945299,"remaining_time":14.65612171},
|
||||
{"learn":[0.4313706335],"iteration":4,"passed_time":0.1241720293,"remaining_time":12.2930309},
|
||||
{"learn":[0.4081301826],"iteration":5,"passed_time":0.1305217758,"remaining_time":10.74629288},
|
||||
{"learn":[0.3828587996],"iteration":6,"passed_time":0.1376135636,"remaining_time":9.691926692},
|
||||
{"learn":[0.365609976],"iteration":7,"passed_time":0.1445159631,"remaining_time":8.887731731},
|
||||
{"learn":[0.3490631697],"iteration":8,"passed_time":0.1531536315,"remaining_time":8.35538145},
|
||||
{"learn":[0.3333962823],"iteration":9,"passed_time":0.1603114082,"remaining_time":7.855258999},
|
||||
{"learn":[0.312891185],"iteration":10,"passed_time":0.1658540665,"remaining_time":7.372967139},
|
||||
{"learn":[0.3037090142],"iteration":11,"passed_time":0.1722805324,"remaining_time":7.006074986},
|
||||
{"learn":[0.2919908865],"iteration":12,"passed_time":0.1779888177,"remaining_time":6.667734938},
|
||||
{"learn":[0.2837458514],"iteration":13,"passed_time":0.1838437142,"remaining_time":6.382003222},
|
||||
{"learn":[0.2754458904],"iteration":14,"passed_time":0.1902647454,"remaining_time":6.151893433},
|
||||
{"learn":[0.2661546879],"iteration":15,"passed_time":0.1971515473,"remaining_time":5.963834305},
|
||||
{"learn":[0.2576277258],"iteration":16,"passed_time":0.2038484387,"remaining_time":5.791693875},
|
||||
{"learn":[0.2506499198],"iteration":17,"passed_time":0.2102260955,"remaining_time":5.629387668},
|
||||
{"learn":[0.243228124],"iteration":18,"passed_time":0.2165364692,"remaining_time":5.481791669},
|
||||
{"learn":[0.2363675801],"iteration":19,"passed_time":0.2219805576,"remaining_time":5.327533383},
|
||||
{"learn":[0.2302850082],"iteration":20,"passed_time":0.2282594741,"remaining_time":5.20648991},
|
||||
{"learn":[0.2253085924],"iteration":21,"passed_time":0.2348924906,"remaining_time":5.103573205},
|
||||
{"learn":[0.2182652683],"iteration":22,"passed_time":0.2417793238,"remaining_time":5.014292933},
|
||||
{"learn":[0.2144547711],"iteration":23,"passed_time":0.2489008547,"remaining_time":4.936533617},
|
||||
{"learn":[0.2092564877],"iteration":24,"passed_time":0.2562265526,"remaining_time":4.868304499},
|
||||
{"learn":[0.2031755704],"iteration":25,"passed_time":0.263150018,"remaining_time":4.797427251},
|
||||
{"learn":[0.1973670486],"iteration":26,"passed_time":0.2698018373,"remaining_time":4.726528483},
|
||||
{"learn":[0.1928959264],"iteration":27,"passed_time":0.2771534653,"remaining_time":4.672015559},
|
||||
{"learn":[0.1887424388],"iteration":28,"passed_time":0.283259025,"remaining_time":4.600517268},
|
||||
{"learn":[0.1845106349],"iteration":29,"passed_time":0.2885696832,"remaining_time":4.520925036},
|
||||
{"learn":[0.1812593391],"iteration":30,"passed_time":0.2964323554,"remaining_time":4.484734667},
|
||||
{"learn":[0.1771052858],"iteration":31,"passed_time":0.304711161,"remaining_time":4.45640073},
|
||||
{"learn":[0.1737391602],"iteration":32,"passed_time":0.3104803854,"remaining_time":4.393767878},
|
||||
{"learn":[0.1707816986],"iteration":33,"passed_time":0.3165587188,"remaining_time":4.338716558},
|
||||
{"learn":[0.1678966844],"iteration":34,"passed_time":0.3229490977,"remaining_time":4.290609441},
|
||||
{"learn":[0.1647249209],"iteration":35,"passed_time":0.3296854994,"remaining_time":4.249279771},
|
||||
{"learn":[0.1599862962],"iteration":36,"passed_time":0.3359471272,"remaining_time":4.203878916},
|
||||
{"learn":[0.1567055293],"iteration":37,"passed_time":0.3413569384,"remaining_time":4.150181725},
|
||||
{"learn":[0.1539076557],"iteration":38,"passed_time":0.348133183,"remaining_time":4.115112753},
|
||||
{"learn":[0.1514359723],"iteration":39,"passed_time":0.3550880859,"remaining_time":4.083512988},
|
||||
{"learn":[0.1483378583],"iteration":40,"passed_time":0.3612912598,"remaining_time":4.044699713},
|
||||
{"learn":[0.1448924265],"iteration":41,"passed_time":0.366745583,"remaining_time":3.999273262},
|
||||
{"learn":[0.1420879771],"iteration":42,"passed_time":0.3735076915,"remaining_time":3.969605},
|
||||
{"learn":[0.1384947656],"iteration":43,"passed_time":0.379605047,"remaining_time":3.934088669},
|
||||
{"learn":[0.13617806],"iteration":44,"passed_time":0.3848248904,"remaining_time":3.891007225},
|
||||
{"learn":[0.1330307332],"iteration":45,"passed_time":0.3907882615,"remaining_time":3.856910233},
|
||||
{"learn":[0.1296919433],"iteration":46,"passed_time":0.3973468041,"remaining_time":3.829746857},
|
||||
{"learn":[0.1271147069],"iteration":47,"passed_time":0.4038019719,"remaining_time":3.802468569},
|
||||
{"learn":[0.1243466721],"iteration":48,"passed_time":0.4100619601,"remaining_time":3.774243755},
|
||||
{"learn":[0.1214520416],"iteration":49,"passed_time":0.4168129477,"remaining_time":3.751316529},
|
||||
{"learn":[0.118583764],"iteration":50,"passed_time":0.4226911208,"remaining_time":3.721339475},
|
||||
{"learn":[0.116650204],"iteration":51,"passed_time":0.4296937652,"remaining_time":3.701977054},
|
||||
{"learn":[0.115032744],"iteration":52,"passed_time":0.4366961191,"remaining_time":3.68307859},
|
||||
{"learn":[0.1128262864],"iteration":53,"passed_time":0.4443170941,"remaining_time":3.669730074},
|
||||
{"learn":[0.1111769796],"iteration":54,"passed_time":0.4501094744,"remaining_time":3.641794838},
|
||||
{"learn":[0.1096331832],"iteration":55,"passed_time":0.4556977806,"remaining_time":3.613032403},
|
||||
{"learn":[0.1079153745],"iteration":56,"passed_time":0.4610692902,"remaining_time":3.583398167},
|
||||
{"learn":[0.1057237655],"iteration":57,"passed_time":0.468119256,"remaining_time":3.567391572},
|
||||
{"learn":[0.1036915041],"iteration":58,"passed_time":0.4747835832,"remaining_time":3.548806105},
|
||||
{"learn":[0.1020993435],"iteration":59,"passed_time":0.4808524172,"remaining_time":3.526251059},
|
||||
{"learn":[0.1004288384],"iteration":60,"passed_time":0.4865160431,"remaining_time":3.501320376},
|
||||
{"learn":[0.09764437105],"iteration":61,"passed_time":0.4925760829,"remaining_time":3.479811682},
|
||||
{"learn":[0.09634917704],"iteration":62,"passed_time":0.4990367884,"remaining_time":3.461572643},
|
||||
{"learn":[0.09448228046],"iteration":63,"passed_time":0.5047478906,"remaining_time":3.438595005},
|
||||
{"learn":[0.09316935994],"iteration":64,"passed_time":0.5114595321,"remaining_time":3.422844561},
|
||||
{"learn":[0.09159877236],"iteration":65,"passed_time":0.517818764,"remaining_time":3.40505066},
|
||||
{"learn":[0.09024255026],"iteration":66,"passed_time":0.5234551794,"remaining_time":3.382926756},
|
||||
{"learn":[0.08887941999],"iteration":67,"passed_time":0.5297214727,"remaining_time":3.365289356},
|
||||
{"learn":[0.08730923544],"iteration":68,"passed_time":0.5362032501,"remaining_time":3.349327548},
|
||||
{"learn":[0.08569082096],"iteration":69,"passed_time":0.5423428323,"remaining_time":3.331534542},
|
||||
{"learn":[0.08398252245],"iteration":70,"passed_time":0.5481942313,"remaining_time":3.312328524},
|
||||
{"learn":[0.08284617398],"iteration":71,"passed_time":0.5544424916,"remaining_time":3.295852589},
|
||||
{"learn":[0.08132481332],"iteration":72,"passed_time":0.5599753432,"remaining_time":3.275472213},
|
||||
{"learn":[0.07990708232],"iteration":73,"passed_time":0.5648431617,"remaining_time":3.251664687},
|
||||
{"learn":[0.07820078485],"iteration":74,"passed_time":0.5717932467,"remaining_time":3.240161732},
|
||||
{"learn":[0.07708151237],"iteration":75,"passed_time":0.5773355513,"remaining_time":3.220924655},
|
||||
{"learn":[0.07563296681],"iteration":76,"passed_time":0.5834081286,"remaining_time":3.204956343},
|
||||
{"learn":[0.07458134069],"iteration":77,"passed_time":0.5903369276,"remaining_time":3.193874147},
|
||||
{"learn":[0.07320831836],"iteration":78,"passed_time":0.5966830535,"remaining_time":3.179791969},
|
||||
{"learn":[0.07198770009],"iteration":79,"passed_time":0.6021306951,"remaining_time":3.161186149},
|
||||
{"learn":[0.07110226607],"iteration":80,"passed_time":0.6079107842,"remaining_time":3.14462492},
|
||||
{"learn":[0.07002896087],"iteration":81,"passed_time":0.6141553946,"remaining_time":3.130694573},
|
||||
{"learn":[0.06882036296],"iteration":82,"passed_time":0.620141216,"remaining_time":3.115649242},
|
||||
{"learn":[0.06747428038],"iteration":83,"passed_time":0.6255326436,"remaining_time":3.097875949},
|
||||
{"learn":[0.06638283151],"iteration":84,"passed_time":0.6309793202,"remaining_time":3.08066374},
|
||||
{"learn":[0.06522050492],"iteration":85,"passed_time":0.6372994927,"remaining_time":3.067930116},
|
||||
{"learn":[0.06401723839],"iteration":86,"passed_time":0.6435613249,"remaining_time":3.055066979},
|
||||
{"learn":[0.0629306825],"iteration":87,"passed_time":0.6496812523,"remaining_time":3.041689499},
|
||||
{"learn":[0.06120382314],"iteration":88,"passed_time":0.655303646,"remaining_time":3.026177511},
|
||||
{"learn":[0.06014952425],"iteration":89,"passed_time":0.660777827,"remaining_time":3.010210101},
|
||||
{"learn":[0.05916410911],"iteration":90,"passed_time":0.666720805,"remaining_time":2.996580322},
|
||||
{"learn":[0.0579616133],"iteration":91,"passed_time":0.6717602975,"remaining_time":2.979110885},
|
||||
{"learn":[0.05699385395],"iteration":92,"passed_time":0.6775892197,"remaining_time":2.965363575},
|
||||
{"learn":[0.05602655066],"iteration":93,"passed_time":0.6844382736,"remaining_time":2.956190841},
|
||||
{"learn":[0.05504278655],"iteration":94,"passed_time":0.6898529351,"remaining_time":2.940951986},
|
||||
{"learn":[0.05437666404],"iteration":95,"passed_time":0.6960039126,"remaining_time":2.929016465},
|
||||
{"learn":[0.05331222916],"iteration":96,"passed_time":0.7021730854,"remaining_time":2.917275808},
|
||||
{"learn":[0.05243006893],"iteration":97,"passed_time":0.7080346819,"remaining_time":2.904387165},
|
||||
{"learn":[0.05125886848],"iteration":98,"passed_time":0.7132993878,"remaining_time":2.889222773},
|
||||
{"learn":[0.05042078409],"iteration":99,"passed_time":0.7190888183,"remaining_time":2.876355273},
|
||||
{"learn":[0.04906500038],"iteration":100,"passed_time":0.7249950308,"remaining_time":2.86408928},
|
||||
{"learn":[0.04801273434],"iteration":101,"passed_time":0.7313383185,"remaining_time":2.853653439},
|
||||
{"learn":[0.04719868007],"iteration":102,"passed_time":0.7365380136,"remaining_time":2.838889237},
|
||||
{"learn":[0.04637230139],"iteration":103,"passed_time":0.7420167587,"remaining_time":2.825371504},
|
||||
{"learn":[0.04556049209],"iteration":104,"passed_time":0.7485235832,"remaining_time":2.815874432},
|
||||
{"learn":[0.04486960057],"iteration":105,"passed_time":0.7543824335,"remaining_time":2.804025272},
|
||||
{"learn":[0.04434267739],"iteration":106,"passed_time":0.76104166,"remaining_time":2.795227779},
|
||||
{"learn":[0.04353926113],"iteration":107,"passed_time":0.7669849075,"remaining_time":2.783871146},
|
||||
{"learn":[0.04288418322],"iteration":108,"passed_time":0.772753025,"remaining_time":2.771985622},
|
||||
{"learn":[0.04203024255],"iteration":109,"passed_time":0.778536825,"remaining_time":2.760266925},
|
||||
{"learn":[0.04125007863],"iteration":110,"passed_time":0.7855760701,"remaining_time":2.753054876},
|
||||
{"learn":[0.04024992793],"iteration":111,"passed_time":0.7922673931,"remaining_time":2.744640612},
|
||||
{"learn":[0.03927288462],"iteration":112,"passed_time":0.7982392218,"remaining_time":2.733792733},
|
||||
{"learn":[0.03862099917],"iteration":113,"passed_time":0.8035001176,"remaining_time":2.720623205},
|
||||
{"learn":[0.03806988678],"iteration":114,"passed_time":0.8100519629,"remaining_time":2.711913093},
|
||||
{"learn":[0.03743653744],"iteration":115,"passed_time":0.816025213,"remaining_time":2.701324843},
|
||||
{"learn":[0.03682585001],"iteration":116,"passed_time":0.8209522666,"remaining_time":2.687390753},
|
||||
{"learn":[0.03626156118],"iteration":117,"passed_time":0.8260824388,"remaining_time":2.674266878},
|
||||
{"learn":[0.03564959041],"iteration":118,"passed_time":0.8330150473,"remaining_time":2.667048177},
|
||||
{"learn":[0.03497391878],"iteration":119,"passed_time":0.8388261679,"remaining_time":2.656282865},
|
||||
{"learn":[0.03449937964],"iteration":120,"passed_time":0.8444025119,"remaining_time":2.644864066},
|
||||
{"learn":[0.03376590421],"iteration":121,"passed_time":0.8506348085,"remaining_time":2.635573423},
|
||||
{"learn":[0.03317944842],"iteration":122,"passed_time":0.8566785793,"remaining_time":2.62575467},
|
||||
{"learn":[0.03259702652],"iteration":123,"passed_time":0.8623096743,"remaining_time":2.614745464},
|
||||
{"learn":[0.03215019078],"iteration":124,"passed_time":0.8679317215,"remaining_time":2.603795164},
|
||||
{"learn":[0.03168190575],"iteration":125,"passed_time":0.873211646,"remaining_time":2.591913933},
|
||||
{"learn":[0.03128758131],"iteration":126,"passed_time":0.8791032763,"remaining_time":2.581933244},
|
||||
{"learn":[0.03088159004],"iteration":127,"passed_time":0.8853119273,"remaining_time":2.572937789},
|
||||
{"learn":[0.03026199694],"iteration":128,"passed_time":0.8915771272,"remaining_time":2.564148172},
|
||||
{"learn":[0.02969928844],"iteration":129,"passed_time":0.8983179462,"remaining_time":2.556751078},
|
||||
{"learn":[0.02923555878],"iteration":130,"passed_time":0.9041639181,"remaining_time":2.546843403},
|
||||
{"learn":[0.02876599571],"iteration":131,"passed_time":0.9096309015,"remaining_time":2.535940695},
|
||||
{"learn":[0.02840168858],"iteration":132,"passed_time":0.9175937162,"remaining_time":2.532006721},
|
||||
{"learn":[0.027898419],"iteration":133,"passed_time":0.926844838,"remaining_time":2.531531423},
|
||||
{"learn":[0.02748629028],"iteration":134,"passed_time":0.9334232259,"remaining_time":2.523699833},
|
||||
{"learn":[0.02699862773],"iteration":135,"passed_time":0.9399212835,"remaining_time":2.515671671},
|
||||
{"learn":[0.02658184642],"iteration":136,"passed_time":0.9463707476,"remaining_time":2.50753709},
|
||||
{"learn":[0.02626729122],"iteration":137,"passed_time":0.9531287889,"remaining_time":2.500236388},
|
||||
{"learn":[0.02584897908],"iteration":138,"passed_time":0.9614780579,"remaining_time":2.497076107},
|
||||
{"learn":[0.02541972488],"iteration":139,"passed_time":0.9679204373,"remaining_time":2.488938267},
|
||||
{"learn":[0.02494266146],"iteration":140,"passed_time":0.9740962326,"remaining_time":2.480145727},
|
||||
{"learn":[0.02461502881],"iteration":141,"passed_time":0.9802457036,"remaining_time":2.471323675},
|
||||
{"learn":[0.02435833752],"iteration":142,"passed_time":0.9867088818,"remaining_time":2.463322173},
|
||||
{"learn":[0.02385493712],"iteration":143,"passed_time":0.9931338497,"remaining_time":2.455247573},
|
||||
{"learn":[0.02355815085],"iteration":144,"passed_time":0.9992172742,"remaining_time":2.446359533},
|
||||
{"learn":[0.02322093904],"iteration":145,"passed_time":1.004449828,"remaining_time":2.435446844},
|
||||
{"learn":[0.02291996685],"iteration":146,"passed_time":1.010228414,"remaining_time":2.425922654},
|
||||
{"learn":[0.02264413135],"iteration":147,"passed_time":1.016802011,"remaining_time":2.418339918},
|
||||
{"learn":[0.02215165535],"iteration":148,"passed_time":1.023373788,"remaining_time":2.41076644},
|
||||
{"learn":[0.02181815394],"iteration":149,"passed_time":1.028895021,"remaining_time":2.40075505},
|
||||
{"learn":[0.02149164732],"iteration":150,"passed_time":1.035607564,"remaining_time":2.393556556},
|
||||
{"learn":[0.02126195234],"iteration":151,"passed_time":1.04168452,"remaining_time":2.384909296},
|
||||
{"learn":[0.02093905713],"iteration":152,"passed_time":1.046664687,"remaining_time":2.373808147},
|
||||
{"learn":[0.02071414202],"iteration":153,"passed_time":1.052534959,"remaining_time":2.364786336},
|
||||
{"learn":[0.02027789833],"iteration":154,"passed_time":1.058379298,"remaining_time":2.355747471},
|
||||
{"learn":[0.0200181878],"iteration":155,"passed_time":1.064729589,"remaining_time":2.347865246},
|
||||
{"learn":[0.01969523072],"iteration":156,"passed_time":1.070482889,"remaining_time":2.338698287},
|
||||
{"learn":[0.01942084828],"iteration":157,"passed_time":1.077102766,"remaining_time":2.331450292},
|
||||
{"learn":[0.01906941531],"iteration":158,"passed_time":1.082718503,"remaining_time":2.322056663},
|
||||
{"learn":[0.01886946896],"iteration":159,"passed_time":1.088557398,"remaining_time":2.313184472},
|
||||
{"learn":[0.01864012576],"iteration":160,"passed_time":1.094708276,"remaining_time":2.305006867},
|
||||
{"learn":[0.01842769591],"iteration":161,"passed_time":1.100586372,"remaining_time":2.296285147},
|
||||
{"learn":[0.01800340895],"iteration":162,"passed_time":1.105960303,"remaining_time":2.286555963},
|
||||
{"learn":[0.01770345849],"iteration":163,"passed_time":1.112500687,"remaining_time":2.279269701},
|
||||
{"learn":[0.01742747097],"iteration":164,"passed_time":1.118755729,"remaining_time":2.271413146},
|
||||
{"learn":[0.0172003427],"iteration":165,"passed_time":1.124958351,"remaining_time":2.263470418},
|
||||
{"learn":[0.01696784415],"iteration":166,"passed_time":1.130894885,"remaining_time":2.255017944},
|
||||
{"learn":[0.01675681608],"iteration":167,"passed_time":1.137381207,"remaining_time":2.247681909},
|
||||
{"learn":[0.0165077259],"iteration":168,"passed_time":1.142966562,"remaining_time":2.238591313},
|
||||
{"learn":[0.01625819555],"iteration":169,"passed_time":1.148268796,"remaining_time":2.228992369},
|
||||
{"learn":[0.01593321269],"iteration":170,"passed_time":1.154666412,"remaining_time":2.221551168},
|
||||
{"learn":[0.01579540631],"iteration":171,"passed_time":1.160231716,"remaining_time":2.2125349},
|
||||
{"learn":[0.01553198715],"iteration":172,"passed_time":1.166350359,"remaining_time":2.204604435},
|
||||
{"learn":[0.01529339356],"iteration":173,"passed_time":1.171774892,"remaining_time":2.195394339},
|
||||
{"learn":[0.01511632302],"iteration":174,"passed_time":1.177892496,"remaining_time":2.187514635},
|
||||
{"learn":[0.01488638764],"iteration":175,"passed_time":1.184211837,"remaining_time":2.180026336},
|
||||
{"learn":[0.01467439088],"iteration":176,"passed_time":1.189677366,"remaining_time":2.170993159},
|
||||
{"learn":[0.01447361371],"iteration":177,"passed_time":1.194445885,"remaining_time":2.160739185},
|
||||
{"learn":[0.01434924538],"iteration":178,"passed_time":1.200587346,"remaining_time":2.153008592},
|
||||
{"learn":[0.01415692136],"iteration":179,"passed_time":1.206678568,"remaining_time":2.145206343},
|
||||
{"learn":[0.01400349735],"iteration":180,"passed_time":1.211932431,"remaining_time":2.135947213},
|
||||
{"learn":[0.01382728502],"iteration":181,"passed_time":1.217143893,"remaining_time":2.126658012},
|
||||
{"learn":[0.01367671496],"iteration":182,"passed_time":1.223678028,"remaining_time":2.119704563},
|
||||
{"learn":[0.01343311967],"iteration":183,"passed_time":1.229316297,"remaining_time":2.111217118},
|
||||
{"learn":[0.01321273871],"iteration":184,"passed_time":1.234826593,"remaining_time":2.102542578},
|
||||
{"learn":[0.01311189229],"iteration":185,"passed_time":1.240732255,"remaining_time":2.094569506},
|
||||
{"learn":[0.01286989412],"iteration":186,"passed_time":1.246543968,"remaining_time":2.086461294},
|
||||
{"learn":[0.01270532387],"iteration":187,"passed_time":1.251694124,"remaining_time":2.07727961},
|
||||
{"learn":[0.01252386855],"iteration":188,"passed_time":1.25677379,"remaining_time":2.068024596},
|
||||
{"learn":[0.01235938138],"iteration":189,"passed_time":1.263196893,"remaining_time":2.061005457},
|
||||
{"learn":[0.01213016015],"iteration":190,"passed_time":1.269132064,"remaining_time":2.053203182},
|
||||
{"learn":[0.01192818183],"iteration":191,"passed_time":1.274525714,"remaining_time":2.044551667},
|
||||
{"learn":[0.01174079057],"iteration":192,"passed_time":1.280631623,"remaining_time":2.037066882},
|
||||
{"learn":[0.01164422552],"iteration":193,"passed_time":1.287476278,"remaining_time":2.030761553},
|
||||
{"learn":[0.01155274703],"iteration":194,"passed_time":1.294481191,"remaining_time":2.02470135},
|
||||
{"learn":[0.01142114705],"iteration":195,"passed_time":1.301119554,"remaining_time":2.018062981},
|
||||
{"learn":[0.01126747114],"iteration":196,"passed_time":1.307506278,"remaining_time":2.011037575},
|
||||
{"learn":[0.01113826904],"iteration":197,"passed_time":1.317352715,"remaining_time":2.009295555},
|
||||
{"learn":[0.01105491048],"iteration":198,"passed_time":1.327639782,"remaining_time":2.008138564},
|
||||
{"learn":[0.01094620398],"iteration":199,"passed_time":1.335954729,"remaining_time":2.003932094},
|
||||
{"learn":[0.0107897519],"iteration":200,"passed_time":1.342493353,"remaining_time":1.997042352},
|
||||
{"learn":[0.0106831631],"iteration":201,"passed_time":1.349497501,"remaining_time":1.990842848},
|
||||
{"learn":[0.01058511319],"iteration":202,"passed_time":1.355544736,"remaining_time":1.983235402},
|
||||
{"learn":[0.01044539946],"iteration":203,"passed_time":1.360864238,"remaining_time":1.974587326},
|
||||
{"learn":[0.01036265164],"iteration":204,"passed_time":1.367155057,"remaining_time":1.967369472},
|
||||
{"learn":[0.0102634589],"iteration":205,"passed_time":1.372654361,"remaining_time":1.959030981},
|
||||
{"learn":[0.0101961873],"iteration":206,"passed_time":1.377860051,"remaining_time":1.950304323},
|
||||
{"learn":[0.01010858626],"iteration":207,"passed_time":1.383261088,"remaining_time":1.941885758},
|
||||
{"learn":[0.01002694767],"iteration":208,"passed_time":1.389496441,"remaining_time":1.934657724},
|
||||
{"learn":[0.009917525255],"iteration":209,"passed_time":1.395227289,"remaining_time":1.926742446},
|
||||
{"learn":[0.009807562806],"iteration":210,"passed_time":1.40149514,"remaining_time":1.919583391},
|
||||
{"learn":[0.009709659608],"iteration":211,"passed_time":1.407627899,"remaining_time":1.912249221},
|
||||
{"learn":[0.009609314333],"iteration":212,"passed_time":1.415172964,"remaining_time":1.906829299},
|
||||
{"learn":[0.009435213529],"iteration":213,"passed_time":1.42307877,"remaining_time":1.901871627},
|
||||
{"learn":[0.009267268742],"iteration":214,"passed_time":1.428704978,"remaining_time":1.893864739},
|
||||
{"learn":[0.009160809875],"iteration":215,"passed_time":1.434956738,"remaining_time":1.886702378},
|
||||
{"learn":[0.009078839844],"iteration":216,"passed_time":1.440484623,"remaining_time":1.878604371},
|
||||
{"learn":[0.008950104417],"iteration":217,"passed_time":1.445848776,"remaining_time":1.870318141},
|
||||
{"learn":[0.00888653703],"iteration":218,"passed_time":1.452081036,"remaining_time":1.863172471},
|
||||
{"learn":[0.008775804748],"iteration":219,"passed_time":1.458264601,"remaining_time":1.855973129},
|
||||
{"learn":[0.008685220713],"iteration":220,"passed_time":1.464331546,"remaining_time":1.848635753},
|
||||
{"learn":[0.008596151598],"iteration":221,"passed_time":1.469876058,"remaining_time":1.840655604},
|
||||
{"learn":[0.00843048626],"iteration":222,"passed_time":1.476151615,"remaining_time":1.833605369},
|
||||
{"learn":[0.008366484903],"iteration":223,"passed_time":1.482016146,"remaining_time":1.826055608},
|
||||
{"learn":[0.008253302587],"iteration":224,"passed_time":1.488370681,"remaining_time":1.819119721},
|
||||
{"learn":[0.008154898247],"iteration":225,"passed_time":1.493970616,"remaining_time":1.81127411},
|
||||
{"learn":[0.008099292582],"iteration":226,"passed_time":1.49944931,"remaining_time":1.803302474},
|
||||
{"learn":[0.008030711997],"iteration":227,"passed_time":1.505321671,"remaining_time":1.795822344},
|
||||
{"learn":[0.00795822391],"iteration":228,"passed_time":1.511536532,"remaining_time":1.788761573},
|
||||
{"learn":[0.007836438198],"iteration":229,"passed_time":1.517826882,"remaining_time":1.781796775},
|
||||
{"learn":[0.007772867275],"iteration":230,"passed_time":1.523411508,"remaining_time":1.774015999},
|
||||
{"learn":[0.007664250046],"iteration":231,"passed_time":1.529815304,"remaining_time":1.767200437},
|
||||
{"learn":[0.007608829106],"iteration":232,"passed_time":1.535447329,"remaining_time":1.759504021},
|
||||
{"learn":[0.007534453115],"iteration":233,"passed_time":1.541374665,"remaining_time":1.752160944},
|
||||
{"learn":[0.007470206932],"iteration":234,"passed_time":1.547387779,"remaining_time":1.744926644},
|
||||
{"learn":[0.007388324402],"iteration":235,"passed_time":1.552825361,"remaining_time":1.737058878},
|
||||
{"learn":[0.007339898252],"iteration":236,"passed_time":1.558070289,"remaining_time":1.728997831},
|
||||
{"learn":[0.007245330263],"iteration":237,"passed_time":1.563140096,"remaining_time":1.720767668},
|
||||
{"learn":[0.007168469769],"iteration":238,"passed_time":1.568264319,"remaining_time":1.712623378},
|
||||
{"learn":[0.007077650189],"iteration":239,"passed_time":1.573595744,"remaining_time":1.704728723},
|
||||
{"learn":[0.006999819228],"iteration":240,"passed_time":1.579142646,"remaining_time":1.69708691},
|
||||
{"learn":[0.006907054293],"iteration":241,"passed_time":1.584372761,"remaining_time":1.689124679},
|
||||
{"learn":[0.006818165925],"iteration":242,"passed_time":1.589930157,"remaining_time":1.681531071},
|
||||
{"learn":[0.00676407076],"iteration":243,"passed_time":1.598923847,"remaining_time":1.677559446},
|
||||
{"learn":[0.006701122068],"iteration":244,"passed_time":1.604604332,"remaining_time":1.670098387},
|
||||
{"learn":[0.006627371844],"iteration":245,"passed_time":1.609862852,"remaining_time":1.662216116},
|
||||
{"learn":[0.006589996472],"iteration":246,"passed_time":1.615270954,"remaining_time":1.654508305},
|
||||
{"learn":[0.006554308813],"iteration":247,"passed_time":1.620518051,"remaining_time":1.646655439},
|
||||
{"learn":[0.006498397336],"iteration":248,"passed_time":1.625783219,"remaining_time":1.638841719},
|
||||
{"learn":[0.006416075974],"iteration":249,"passed_time":1.632088834,"remaining_time":1.632088834},
|
||||
{"learn":[0.006361855303],"iteration":250,"passed_time":1.63813436,"remaining_time":1.625081497},
|
||||
{"learn":[0.006331474721],"iteration":251,"passed_time":1.64354065,"remaining_time":1.617452703},
|
||||
{"learn":[0.006262989592],"iteration":252,"passed_time":1.649855666,"remaining_time":1.610728654},
|
||||
{"learn":[0.006160215496],"iteration":253,"passed_time":1.65560919,"remaining_time":1.603464018},
|
||||
{"learn":[0.006114280036],"iteration":254,"passed_time":1.660676343,"remaining_time":1.59555178},
|
||||
{"learn":[0.006037985207],"iteration":255,"passed_time":1.666300421,"remaining_time":1.588192588},
|
||||
{"learn":[0.00597384155],"iteration":256,"passed_time":1.672054985,"remaining_time":1.580970278},
|
||||
{"learn":[0.005908762779],"iteration":257,"passed_time":1.677348365,"remaining_time":1.573326761},
|
||||
{"learn":[0.005865229181],"iteration":258,"passed_time":1.682916924,"remaining_time":1.565957447},
|
||||
{"learn":[0.005803677857],"iteration":259,"passed_time":1.68845135,"remaining_time":1.558570477},
|
||||
{"learn":[0.005754558768],"iteration":260,"passed_time":1.696448595,"remaining_time":1.553452928},
|
||||
{"learn":[0.005714900854],"iteration":261,"passed_time":1.702634089,"remaining_time":1.546667607},
|
||||
{"learn":[0.005668938878],"iteration":262,"passed_time":1.708191805,"remaining_time":1.539321132},
|
||||
{"learn":[0.00564153422],"iteration":263,"passed_time":1.713846348,"remaining_time":1.532074765},
|
||||
{"learn":[0.005581819536],"iteration":264,"passed_time":1.719505094,"remaining_time":1.52484414},
|
||||
{"learn":[0.005505347053],"iteration":265,"passed_time":1.725162515,"remaining_time":1.517624167},
|
||||
{"learn":[0.005465319247],"iteration":266,"passed_time":1.737403116,"remaining_time":1.516160772},
|
||||
{"learn":[0.005415476727],"iteration":267,"passed_time":1.74408467,"remaining_time":1.50980464},
|
||||
{"learn":[0.00536813241],"iteration":268,"passed_time":1.75024624,"remaining_time":1.50299956},
|
||||
{"learn":[0.005311889931],"iteration":269,"passed_time":1.756239046,"remaining_time":1.496055483},
|
||||
{"learn":[0.005259258864],"iteration":270,"passed_time":1.762001034,"remaining_time":1.488923383},
|
||||
{"learn":[0.005214808714],"iteration":271,"passed_time":1.767918428,"remaining_time":1.481931624},
|
||||
{"learn":[0.005191945278],"iteration":272,"passed_time":1.773247916,"remaining_time":1.47445889},
|
||||
{"learn":[0.005153187565],"iteration":273,"passed_time":1.778327147,"remaining_time":1.466795384},
|
||||
{"learn":[0.005113676812],"iteration":274,"passed_time":1.784510167,"remaining_time":1.460053773},
|
||||
{"learn":[0.005062828029],"iteration":275,"passed_time":1.792027712,"remaining_time":1.454399302},
|
||||
{"learn":[0.005011375576],"iteration":276,"passed_time":1.79767339,"remaining_time":1.447224426},
|
||||
{"learn":[0.004955984687],"iteration":277,"passed_time":1.803537514,"remaining_time":1.440234993},
|
||||
{"learn":[0.004895690639],"iteration":278,"passed_time":1.809276661,"remaining_time":1.433154631},
|
||||
{"learn":[0.004851626731],"iteration":279,"passed_time":1.815114735,"remaining_time":1.426161577},
|
||||
{"learn":[0.00479070073],"iteration":280,"passed_time":1.820851968,"remaining_time":1.419098153},
|
||||
{"learn":[0.004758315135],"iteration":281,"passed_time":1.826508167,"remaining_time":1.411981491},
|
||||
{"learn":[0.004698995532],"iteration":282,"passed_time":1.832197413,"remaining_time":1.40490049},
|
||||
{"learn":[0.004668266714],"iteration":283,"passed_time":1.837805476,"remaining_time":1.397767545},
|
||||
{"learn":[0.004642802991],"iteration":284,"passed_time":1.843484021,"remaining_time":1.390698472},
|
||||
{"learn":[0.004616858011],"iteration":285,"passed_time":1.84924023,"remaining_time":1.383697235},
|
||||
{"learn":[0.004564104089],"iteration":286,"passed_time":1.854864149,"remaining_time":1.376606494},
|
||||
{"learn":[0.004503435289],"iteration":287,"passed_time":1.860865992,"remaining_time":1.369804133},
|
||||
{"learn":[0.004503419821],"iteration":288,"passed_time":1.866709923,"remaining_time":1.36289202},
|
||||
{"learn":[0.004454941082],"iteration":289,"passed_time":1.872811764,"remaining_time":1.356174036},
|
||||
{"learn":[0.004436089884],"iteration":290,"passed_time":1.878582149,"remaining_time":1.349222231},
|
||||
{"learn":[0.004417716849],"iteration":291,"passed_time":1.884440243,"remaining_time":1.342340995},
|
||||
{"learn":[0.00436981438],"iteration":292,"passed_time":1.890273056,"remaining_time":1.335448882},
|
||||
{"learn":[0.00434852243],"iteration":293,"passed_time":1.896009964,"remaining_time":1.328496777},
|
||||
{"learn":[0.004315405891],"iteration":294,"passed_time":1.902631351,"remaining_time":1.322167549},
|
||||
{"learn":[0.004295244385],"iteration":295,"passed_time":1.90948892,"remaining_time":1.31599912},
|
||||
{"learn":[0.004272305822],"iteration":296,"passed_time":1.91684595,"remaining_time":1.310167434},
|
||||
{"learn":[0.004231514811],"iteration":297,"passed_time":1.922882402,"remaining_time":1.303430353},
|
||||
{"learn":[0.004190003905],"iteration":298,"passed_time":1.928979478,"remaining_time":1.296738713},
|
||||
{"learn":[0.004147418237],"iteration":299,"passed_time":1.935298671,"remaining_time":1.290199114},
|
||||
{"learn":[0.004116146735],"iteration":300,"passed_time":1.941744222,"remaining_time":1.283744519},
|
||||
{"learn":[0.004061566778],"iteration":301,"passed_time":1.948353733,"remaining_time":1.277397481},
|
||||
{"learn":[0.004030863149],"iteration":302,"passed_time":1.954505766,"remaining_time":1.270751273},
|
||||
{"learn":[0.003988591248],"iteration":303,"passed_time":1.960049995,"remaining_time":1.263716444},
|
||||
{"learn":[0.00395778907],"iteration":304,"passed_time":1.966118016,"remaining_time":1.257026272},
|
||||
{"learn":[0.003930500197],"iteration":305,"passed_time":1.972702275,"remaining_time":1.250667455},
|
||||
{"learn":[0.003899783752],"iteration":306,"passed_time":1.979082215,"remaining_time":1.244178722},
|
||||
{"learn":[0.003873748178],"iteration":307,"passed_time":1.985693045,"remaining_time":1.237834626},
|
||||
{"learn":[0.003838509515],"iteration":308,"passed_time":1.992090106,"remaining_time":1.231356667},
|
||||
{"learn":[0.003806858841],"iteration":309,"passed_time":1.998370323,"remaining_time":1.224807618},
|
||||
{"learn":[0.003780498452],"iteration":310,"passed_time":2.004735963,"remaining_time":1.218312209},
|
||||
{"learn":[0.003750855885],"iteration":311,"passed_time":2.010940995,"remaining_time":1.211720856},
|
||||
{"learn":[0.003733034814],"iteration":312,"passed_time":2.017153087,"remaining_time":1.205136189},
|
||||
{"learn":[0.003703924326],"iteration":313,"passed_time":2.023390385,"remaining_time":1.198568827},
|
||||
{"learn":[0.003670365198],"iteration":314,"passed_time":2.029799613,"remaining_time":1.192104535},
|
||||
{"learn":[0.00363597017],"iteration":315,"passed_time":2.036038682,"remaining_time":1.185541511},
|
||||
{"learn":[0.003611067912],"iteration":316,"passed_time":2.042277539,"remaining_time":1.178980409},
|
||||
{"learn":[0.003578873008],"iteration":317,"passed_time":2.049151716,"remaining_time":1.172784945},
|
||||
{"learn":[0.003544636851],"iteration":318,"passed_time":2.056229427,"remaining_time":1.166700709},
|
||||
{"learn":[0.003516000548],"iteration":319,"passed_time":2.065704253,"remaining_time":1.161958642},
|
||||
{"learn":[0.003486875476],"iteration":320,"passed_time":2.073019477,"remaining_time":1.155982824},
|
||||
{"learn":[0.003449603515],"iteration":321,"passed_time":2.079447135,"remaining_time":1.149508043},
|
||||
{"learn":[0.003422755682],"iteration":322,"passed_time":2.087278591,"remaining_time":1.143802819},
|
||||
{"learn":[0.003408672193],"iteration":323,"passed_time":2.093292164,"remaining_time":1.137096978},
|
||||
{"learn":[0.00338415198],"iteration":324,"passed_time":2.098927866,"remaining_time":1.130191928},
|
||||
{"learn":[0.003354939628],"iteration":325,"passed_time":2.104631537,"remaining_time":1.123330943},
|
||||
{"learn":[0.003334189478],"iteration":326,"passed_time":2.111260138,"remaining_time":1.116966373},
|
||||
{"learn":[0.003313474903],"iteration":327,"passed_time":2.118023858,"remaining_time":1.110671047},
|
||||
{"learn":[0.003313474903],"iteration":328,"passed_time":2.125343427,"remaining_time":1.104661781},
|
||||
{"learn":[0.003313477775],"iteration":329,"passed_time":2.13213777,"remaining_time":1.098374003},
|
||||
{"learn":[0.003289821968],"iteration":330,"passed_time":2.137952113,"remaining_time":1.091582801},
|
||||
{"learn":[0.003267459675],"iteration":331,"passed_time":2.144172392,"remaining_time":1.085002897},
|
||||
{"learn":[0.003240799661],"iteration":332,"passed_time":2.152195074,"remaining_time":1.079329061},
|
||||
{"learn":[0.003205838305],"iteration":333,"passed_time":2.159046784,"remaining_time":1.07305918},
|
||||
{"learn":[0.003205785495],"iteration":334,"passed_time":2.164935056,"remaining_time":1.066311296},
|
||||
{"learn":[0.003187324959],"iteration":335,"passed_time":2.170938895,"remaining_time":1.059624937},
|
||||
{"learn":[0.003167171408],"iteration":336,"passed_time":2.178067596,"remaining_time":1.053486701},
|
||||
{"learn":[0.003135866761],"iteration":337,"passed_time":2.18623387,"remaining_time":1.047839902},
|
||||
{"learn":[0.003135805333],"iteration":338,"passed_time":2.193364071,"remaining_time":1.041686181},
|
||||
{"learn":[0.003119384944],"iteration":339,"passed_time":2.200103757,"remaining_time":1.035342944},
|
||||
{"learn":[0.003094751598],"iteration":340,"passed_time":2.206186977,"remaining_time":1.028691289},
|
||||
{"learn":[0.003072778421],"iteration":341,"passed_time":2.213363033,"remaining_time":1.022547834},
|
||||
{"learn":[0.003046316389],"iteration":342,"passed_time":2.221209577,"remaining_time":1.016705258},
|
||||
{"learn":[0.003010126482],"iteration":343,"passed_time":2.228331739,"remaining_time":1.010522533},
|
||||
{"learn":[0.002999184196],"iteration":344,"passed_time":2.234921874,"remaining_time":1.004095335},
|
||||
{"learn":[0.002971393958],"iteration":345,"passed_time":2.241712984,"remaining_time":0.9977566462},
|
||||
{"learn":[0.002952100393],"iteration":346,"passed_time":2.249270534,"remaining_time":0.9917532903},
|
||||
{"learn":[0.002927057604],"iteration":347,"passed_time":2.257341977,"remaining_time":0.9859654613},
|
||||
{"learn":[0.002906064395],"iteration":348,"passed_time":2.264487919,"remaining_time":0.9797641138},
|
||||
{"learn":[0.002880973436],"iteration":349,"passed_time":2.271335818,"remaining_time":0.9734296364},
|
||||
{"learn":[0.002857434297],"iteration":350,"passed_time":2.278359504,"remaining_time":0.967166855},
|
||||
{"learn":[0.002831897877],"iteration":351,"passed_time":2.285040762,"remaining_time":0.960755775},
|
||||
{"learn":[0.002831879758],"iteration":352,"passed_time":2.294186877,"remaining_time":0.9553696059},
|
||||
{"learn":[0.002831880642],"iteration":353,"passed_time":2.301431146,"remaining_time":0.9491778174},
|
||||
{"learn":[0.00283185258],"iteration":354,"passed_time":2.307831256,"remaining_time":0.9426353017},
|
||||
{"learn":[0.002807546038],"iteration":355,"passed_time":2.315379256,"remaining_time":0.9365579013},
|
||||
{"learn":[0.002791071513],"iteration":356,"passed_time":2.321896573,"remaining_time":0.9300594117},
|
||||
{"learn":[0.002768233267],"iteration":357,"passed_time":2.328222254,"remaining_time":0.9234848046},
|
||||
{"learn":[0.002747346783],"iteration":358,"passed_time":2.334589819,"remaining_time":0.9169280346},
|
||||
{"learn":[0.002730674717],"iteration":359,"passed_time":2.341161626,"remaining_time":0.9104517436},
|
||||
{"learn":[0.002730663669],"iteration":360,"passed_time":2.347244758,"remaining_time":0.9037867629},
|
||||
{"learn":[0.00270765727],"iteration":361,"passed_time":2.353995714,"remaining_time":0.8973795814},
|
||||
{"learn":[0.002707630313],"iteration":362,"passed_time":2.360099551,"remaining_time":0.8907262767},
|
||||
{"learn":[0.002689885033],"iteration":363,"passed_time":2.366972757,"remaining_time":0.8843634477},
|
||||
{"learn":[0.002671150724],"iteration":364,"passed_time":2.373742019,"remaining_time":0.8779593771},
|
||||
{"learn":[0.002652489995],"iteration":365,"passed_time":2.380318683,"remaining_time":0.8714827966},
|
||||
{"learn":[0.002633375631],"iteration":366,"passed_time":2.386643705,"remaining_time":0.8649144761},
|
||||
{"learn":[0.002633324147],"iteration":367,"passed_time":2.392461131,"remaining_time":0.8581654056},
|
||||
{"learn":[0.00263331575],"iteration":368,"passed_time":2.399873606,"remaining_time":0.8519876487},
|
||||
{"learn":[0.00263331575],"iteration":369,"passed_time":2.405454136,"remaining_time":0.8451595614},
|
||||
{"learn":[0.002621703727],"iteration":370,"passed_time":2.411647605,"remaining_time":0.8385513235},
|
||||
{"learn":[0.002601895319],"iteration":371,"passed_time":2.417657562,"remaining_time":0.8318821718},
|
||||
{"learn":[0.002578357285],"iteration":372,"passed_time":2.423704519,"remaining_time":0.8252291527},
|
||||
{"learn":[0.002563084133],"iteration":373,"passed_time":2.429855348,"remaining_time":0.8186143687},
|
||||
{"learn":[0.002553124909],"iteration":374,"passed_time":2.436299858,"remaining_time":0.8120999525},
|
||||
{"learn":[0.002553112536],"iteration":375,"passed_time":2.442488477,"remaining_time":0.8055015189},
|
||||
{"learn":[0.002553094417],"iteration":376,"passed_time":2.448520795,"remaining_time":0.7988542648},
|
||||
{"learn":[0.002553094417],"iteration":377,"passed_time":2.454427863,"remaining_time":0.7921698394},
|
||||
{"learn":[0.00253620404],"iteration":378,"passed_time":2.460521018,"remaining_time":0.7855489264},
|
||||
{"learn":[0.002536187468],"iteration":379,"passed_time":2.466417902,"remaining_time":0.7788688111},
|
||||
{"learn":[0.002536171117],"iteration":380,"passed_time":2.472485438,"remaining_time":0.772246108},
|
||||
{"learn":[0.002536143496],"iteration":381,"passed_time":2.478200924,"remaining_time":0.7655175629},
|
||||
{"learn":[0.002536143496],"iteration":382,"passed_time":2.483748128,"remaining_time":0.7587429009},
|
||||
{"learn":[0.002513597363],"iteration":383,"passed_time":2.489755497,"remaining_time":0.7521136397},
|
||||
{"learn":[0.002513565986],"iteration":384,"passed_time":2.495687385,"remaining_time":0.745465063},
|
||||
{"learn":[0.002513547425],"iteration":385,"passed_time":2.501578959,"remaining_time":0.7388082937},
|
||||
{"learn":[0.002494829689],"iteration":386,"passed_time":2.506876143,"remaining_time":0.7319819228},
|
||||
{"learn":[0.002470754273],"iteration":387,"passed_time":2.512204915,"remaining_time":0.7251725528},
|
||||
{"learn":[0.002470731072],"iteration":388,"passed_time":2.518035076,"remaining_time":0.718513865},
|
||||
{"learn":[0.002449697206],"iteration":389,"passed_time":2.523676426,"remaining_time":0.7118061714},
|
||||
{"learn":[0.002449695218],"iteration":390,"passed_time":2.52917549,"remaining_time":0.7050642672},
|
||||
{"learn":[0.002434601267],"iteration":391,"passed_time":2.534971579,"remaining_time":0.6984105371},
|
||||
{"learn":[0.002414722372],"iteration":392,"passed_time":2.540709041,"remaining_time":0.6917452097},
|
||||
{"learn":[0.002414575211],"iteration":393,"passed_time":2.546071718,"remaining_time":0.6849837616},
|
||||
{"learn":[0.002399905508],"iteration":394,"passed_time":2.551760536,"remaining_time":0.6783160919},
|
||||
{"learn":[0.002399884737],"iteration":395,"passed_time":2.556970636,"remaining_time":0.6715276418},
|
||||
{"learn":[0.002385647237],"iteration":396,"passed_time":2.562426414,"remaining_time":0.6648108832},
|
||||
{"learn":[0.00238562713],"iteration":397,"passed_time":2.567914265,"remaining_time":0.6581086809},
|
||||
{"learn":[0.00238562713],"iteration":398,"passed_time":2.573205157,"remaining_time":0.6513627089},
|
||||
{"learn":[0.002385628897],"iteration":399,"passed_time":2.578736119,"remaining_time":0.6446840298},
|
||||
{"learn":[0.002369015387],"iteration":400,"passed_time":2.584321977,"remaining_time":0.6380246277},
|
||||
{"learn":[0.00236900699],"iteration":401,"passed_time":2.589968794,"remaining_time":0.6313854273},
|
||||
{"learn":[0.002368983126],"iteration":402,"passed_time":2.595436364,"remaining_time":0.6247080082},
|
||||
{"learn":[0.002361901277],"iteration":403,"passed_time":2.600955983,"remaining_time":0.6180489465},
|
||||
{"learn":[0.002361854875],"iteration":404,"passed_time":2.606477123,"remaining_time":0.6113958683},
|
||||
{"learn":[0.002361864155],"iteration":405,"passed_time":2.61205257,"remaining_time":0.6047609399},
|
||||
{"learn":[0.00236183079],"iteration":406,"passed_time":2.617570034,"remaining_time":0.5981179685},
|
||||
{"learn":[0.002361810461],"iteration":407,"passed_time":2.622941991,"remaining_time":0.5914477039},
|
||||
{"learn":[0.002361746603],"iteration":408,"passed_time":2.628511572,"remaining_time":0.5848277582},
|
||||
{"learn":[0.002361746603],"iteration":409,"passed_time":2.633875386,"remaining_time":0.5781677676},
|
||||
{"learn":[0.002361737102],"iteration":410,"passed_time":2.639239474,"remaining_time":0.571514144},
|
||||
{"learn":[0.002361720309],"iteration":411,"passed_time":2.644741615,"remaining_time":0.5648962674},
|
||||
{"learn":[0.002351297948],"iteration":412,"passed_time":2.650047465,"remaining_time":0.5582424442},
|
||||
{"learn":[0.002333661381],"iteration":413,"passed_time":2.655646457,"remaining_time":0.5516560274},
|
||||
{"learn":[0.002333661381],"iteration":414,"passed_time":2.660962775,"remaining_time":0.5450164719},
|
||||
{"learn":[0.002318700007],"iteration":415,"passed_time":2.666660924,"remaining_time":0.538460379},
|
||||
{"learn":[0.00231867084],"iteration":416,"passed_time":2.671947264,"remaining_time":0.5318264338},
|
||||
{"learn":[0.002318685866],"iteration":417,"passed_time":2.677315829,"remaining_time":0.5252150668},
|
||||
{"learn":[0.002296192542],"iteration":418,"passed_time":2.683005523,"remaining_time":0.5186717121},
|
||||
{"learn":[0.00229618746],"iteration":419,"passed_time":2.688459654,"remaining_time":0.5120875531},
|
||||
{"learn":[0.002281454341],"iteration":420,"passed_time":2.694248773,"remaining_time":0.5055716224},
|
||||
{"learn":[0.002281415893],"iteration":421,"passed_time":2.699726578,"remaining_time":0.499001595},
|
||||
{"learn":[0.002281417661],"iteration":422,"passed_time":2.705078559,"remaining_time":0.4924138276},
|
||||
{"learn":[0.002281389599],"iteration":423,"passed_time":2.710619718,"remaining_time":0.4858657985},
|
||||
{"learn":[0.002281389599],"iteration":424,"passed_time":2.716056729,"remaining_time":0.4793041287},
|
||||
{"learn":[0.00226771025],"iteration":425,"passed_time":2.721822076,"remaining_time":0.4728047737},
|
||||
{"learn":[0.002267697434],"iteration":426,"passed_time":2.727585573,"remaining_time":0.4663085406},
|
||||
{"learn":[0.002267694562],"iteration":427,"passed_time":2.733082137,"remaining_time":0.4597708268},
|
||||
{"learn":[0.002267671582],"iteration":428,"passed_time":2.738418118,"remaining_time":0.4532113902},
|
||||
{"learn":[0.002267653684],"iteration":429,"passed_time":2.744105626,"remaining_time":0.4467148694},
|
||||
{"learn":[0.002267653684],"iteration":430,"passed_time":2.749635751,"remaining_time":0.4401969067},
|
||||
{"learn":[0.002267666278],"iteration":431,"passed_time":2.754928003,"remaining_time":0.4336460745},
|
||||
{"learn":[0.002267630041],"iteration":432,"passed_time":2.759860984,"remaining_time":0.427045464},
|
||||
{"learn":[0.002267621423],"iteration":433,"passed_time":2.76452767,"remaining_time":0.420412042},
|
||||
{"learn":[0.002256095575],"iteration":434,"passed_time":2.769675107,"remaining_time":0.4138594988},
|
||||
{"learn":[0.002256036799],"iteration":435,"passed_time":2.774551396,"remaining_time":0.4072735994},
|
||||
{"learn":[0.002256019564],"iteration":436,"passed_time":2.779481664,"remaining_time":0.4007033062},
|
||||
{"learn":[0.002255996584],"iteration":437,"passed_time":2.784987467,"remaining_time":0.3942219702},
|
||||
{"learn":[0.002255975151],"iteration":438,"passed_time":2.790929181,"remaining_time":0.3878056493},
|
||||
{"learn":[0.002255944216],"iteration":439,"passed_time":2.796479451,"remaining_time":0.381338107},
|
||||
{"learn":[0.002255927202],"iteration":440,"passed_time":2.802309467,"remaining_time":0.3749121509},
|
||||
{"learn":[0.002255898256],"iteration":441,"passed_time":2.808363735,"remaining_time":0.3685183182},
|
||||
{"learn":[0.002255898256],"iteration":442,"passed_time":2.814161426,"remaining_time":0.3620930051},
|
||||
{"learn":[0.002255898256],"iteration":443,"passed_time":2.820793122,"remaining_time":0.355775709},
|
||||
{"learn":[0.002255898256],"iteration":444,"passed_time":2.828238568,"remaining_time":0.3495575759},
|
||||
{"learn":[0.002237031369],"iteration":445,"passed_time":2.836322912,"remaining_time":0.3434112942},
|
||||
{"learn":[0.002237036451],"iteration":446,"passed_time":2.843688677,"remaining_time":0.3371711406},
|
||||
{"learn":[0.002221210451],"iteration":447,"passed_time":2.852109334,"remaining_time":0.3310484048},
|
||||
{"learn":[0.00222120559],"iteration":448,"passed_time":2.859755976,"remaining_time":0.3248275162},
|
||||
{"learn":[0.002210740363],"iteration":449,"passed_time":2.86690254,"remaining_time":0.3185447266},
|
||||
{"learn":[0.002194734499],"iteration":450,"passed_time":2.874916475,"remaining_time":0.3123523443},
|
||||
{"learn":[0.002194707763],"iteration":451,"passed_time":2.882777744,"remaining_time":0.3061356896},
|
||||
{"learn":[0.00219468876],"iteration":452,"passed_time":2.89101055,"remaining_time":0.2999503219},
|
||||
{"learn":[0.002194694505],"iteration":453,"passed_time":2.89805487,"remaining_time":0.2936355155},
|
||||
{"learn":[0.002194666443],"iteration":454,"passed_time":2.904223893,"remaining_time":0.2872309345},
|
||||
{"learn":[0.002175334209],"iteration":455,"passed_time":2.911236419,"remaining_time":0.2809087772},
|
||||
{"learn":[0.002166846817],"iteration":456,"passed_time":2.917561015,"remaining_time":0.2745188701},
|
||||
{"learn":[0.002166824942],"iteration":457,"passed_time":2.924180988,"remaining_time":0.2681563352},
|
||||
{"learn":[0.002166792681],"iteration":458,"passed_time":2.930790216,"remaining_time":0.2617917187},
|
||||
{"learn":[0.002166766608],"iteration":459,"passed_time":2.93760902,"remaining_time":0.2554442626},
|
||||
{"learn":[0.002166747605],"iteration":460,"passed_time":2.944706747,"remaining_time":0.2491183582},
|
||||
{"learn":[0.002166747605],"iteration":461,"passed_time":2.950923251,"remaining_time":0.242716631},
|
||||
{"learn":[0.002166750919],"iteration":462,"passed_time":2.956866259,"remaining_time":0.2362938479},
|
||||
{"learn":[0.002148725016],"iteration":463,"passed_time":2.962967723,"remaining_time":0.2298854268},
|
||||
{"learn":[0.002148725016],"iteration":464,"passed_time":2.968874337,"remaining_time":0.2234636598},
|
||||
{"learn":[0.002148730541],"iteration":465,"passed_time":2.97497123,"remaining_time":0.2170579867},
|
||||
{"learn":[0.002148697617],"iteration":466,"passed_time":2.980942401,"remaining_time":0.2106447521},
|
||||
{"learn":[0.00214868215],"iteration":467,"passed_time":2.987012355,"remaining_time":0.204240161},
|
||||
{"learn":[0.002148675079],"iteration":468,"passed_time":2.993157683,"remaining_time":0.1978419791},
|
||||
{"learn":[0.002136127942],"iteration":469,"passed_time":2.999589502,"remaining_time":0.1914631597},
|
||||
{"learn":[0.002118017411],"iteration":470,"passed_time":3.005833951,"remaining_time":0.1850725787},
|
||||
{"learn":[0.002104941731],"iteration":471,"passed_time":3.011493516,"remaining_time":0.1786479204},
|
||||
{"learn":[0.002104892457],"iteration":472,"passed_time":3.01761408,"remaining_time":0.1722528122},
|
||||
{"learn":[0.002104878094],"iteration":473,"passed_time":3.023260121,"remaining_time":0.1658328336},
|
||||
{"learn":[0.002104869698],"iteration":474,"passed_time":3.029108836,"remaining_time":0.1594267808},
|
||||
{"learn":[0.002104843403],"iteration":475,"passed_time":3.035039568,"remaining_time":0.1530272051},
|
||||
{"learn":[0.002104843403],"iteration":476,"passed_time":3.040550175,"remaining_time":0.1466093376},
|
||||
{"learn":[0.002104850253],"iteration":477,"passed_time":3.046220475,"remaining_time":0.140202616},
|
||||
{"learn":[0.002104835669],"iteration":478,"passed_time":3.051984365,"remaining_time":0.1338030724},
|
||||
{"learn":[0.002104778219],"iteration":479,"passed_time":3.057633249,"remaining_time":0.1274013854},
|
||||
{"learn":[0.002104778219],"iteration":480,"passed_time":3.063148739,"remaining_time":0.1209975593},
|
||||
{"learn":[0.002104778219],"iteration":481,"passed_time":3.068948469,"remaining_time":0.1146080341},
|
||||
{"learn":[0.002104781754],"iteration":482,"passed_time":3.074542952,"remaining_time":0.1082137271},
|
||||
{"learn":[0.002104777114],"iteration":483,"passed_time":3.079888869,"remaining_time":0.1018145081},
|
||||
{"learn":[0.002104762752],"iteration":484,"passed_time":3.085570183,"remaining_time":0.09543000567},
|
||||
{"learn":[0.002104742644],"iteration":485,"passed_time":3.091431953,"remaining_time":0.08905359536},
|
||||
{"learn":[0.002104728723],"iteration":486,"passed_time":3.097400027,"remaining_time":0.08268213626},
|
||||
{"learn":[0.002104708174],"iteration":487,"passed_time":3.103132347,"remaining_time":0.07630653312},
|
||||
{"learn":[0.002104675472],"iteration":488,"passed_time":3.108998189,"remaining_time":0.06993656458},
|
||||
{"learn":[0.002090229825],"iteration":489,"passed_time":3.115154074,"remaining_time":0.06357457295},
|
||||
{"learn":[0.002086724696],"iteration":490,"passed_time":3.120988353,"remaining_time":0.05720752582},
|
||||
{"learn":[0.00208671409],"iteration":491,"passed_time":3.126476368,"remaining_time":0.05083701411},
|
||||
{"learn":[0.002086702158],"iteration":492,"passed_time":3.132050135,"remaining_time":0.04447130009},
|
||||
{"learn":[0.002086705251],"iteration":493,"passed_time":3.137718015,"remaining_time":0.0381099354},
|
||||
{"learn":[0.002067750643],"iteration":494,"passed_time":3.143372722,"remaining_time":0.03175123961},
|
||||
{"learn":[0.002051501941],"iteration":495,"passed_time":3.148244201,"remaining_time":0.02538906614},
|
||||
{"learn":[0.002051467029],"iteration":496,"passed_time":3.153009265,"remaining_time":0.01903224908},
|
||||
{"learn":[0.002051466587],"iteration":497,"passed_time":3.157840748,"remaining_time":0.01268209136},
|
||||
{"learn":[0.002051456865],"iteration":498,"passed_time":3.162634441,"remaining_time":0.006337944773},
|
||||
{"learn":[0.002051436537],"iteration":499,"passed_time":3.167324097,"remaining_time":0}
|
||||
]}
|
||||
Binary file not shown.
501
classification_model/Parallel/catboost_info/learn_error.tsv
Normal file
501
classification_model/Parallel/catboost_info/learn_error.tsv
Normal file
@ -0,0 +1,501 @@
|
||||
iter Logloss
|
||||
0 0.6147067235
|
||||
1 0.5597961256
|
||||
2 0.5094282629
|
||||
3 0.4635694785
|
||||
4 0.4313706335
|
||||
5 0.4081301826
|
||||
6 0.3828587996
|
||||
7 0.365609976
|
||||
8 0.3490631697
|
||||
9 0.3333962823
|
||||
10 0.312891185
|
||||
11 0.3037090142
|
||||
12 0.2919908865
|
||||
13 0.2837458514
|
||||
14 0.2754458904
|
||||
15 0.2661546879
|
||||
16 0.2576277258
|
||||
17 0.2506499198
|
||||
18 0.243228124
|
||||
19 0.2363675801
|
||||
20 0.2302850082
|
||||
21 0.2253085924
|
||||
22 0.2182652683
|
||||
23 0.2144547711
|
||||
24 0.2092564877
|
||||
25 0.2031755704
|
||||
26 0.1973670486
|
||||
27 0.1928959264
|
||||
28 0.1887424388
|
||||
29 0.1845106349
|
||||
30 0.1812593391
|
||||
31 0.1771052858
|
||||
32 0.1737391602
|
||||
33 0.1707816986
|
||||
34 0.1678966844
|
||||
35 0.1647249209
|
||||
36 0.1599862962
|
||||
37 0.1567055293
|
||||
38 0.1539076557
|
||||
39 0.1514359723
|
||||
40 0.1483378583
|
||||
41 0.1448924265
|
||||
42 0.1420879771
|
||||
43 0.1384947656
|
||||
44 0.13617806
|
||||
45 0.1330307332
|
||||
46 0.1296919433
|
||||
47 0.1271147069
|
||||
48 0.1243466721
|
||||
49 0.1214520416
|
||||
50 0.118583764
|
||||
51 0.116650204
|
||||
52 0.115032744
|
||||
53 0.1128262864
|
||||
54 0.1111769796
|
||||
55 0.1096331832
|
||||
56 0.1079153745
|
||||
57 0.1057237655
|
||||
58 0.1036915041
|
||||
59 0.1020993435
|
||||
60 0.1004288384
|
||||
61 0.09764437105
|
||||
62 0.09634917704
|
||||
63 0.09448228046
|
||||
64 0.09316935994
|
||||
65 0.09159877236
|
||||
66 0.09024255026
|
||||
67 0.08887941999
|
||||
68 0.08730923544
|
||||
69 0.08569082096
|
||||
70 0.08398252245
|
||||
71 0.08284617398
|
||||
72 0.08132481332
|
||||
73 0.07990708232
|
||||
74 0.07820078485
|
||||
75 0.07708151237
|
||||
76 0.07563296681
|
||||
77 0.07458134069
|
||||
78 0.07320831836
|
||||
79 0.07198770009
|
||||
80 0.07110226607
|
||||
81 0.07002896087
|
||||
82 0.06882036296
|
||||
83 0.06747428038
|
||||
84 0.06638283151
|
||||
85 0.06522050492
|
||||
86 0.06401723839
|
||||
87 0.0629306825
|
||||
88 0.06120382314
|
||||
89 0.06014952425
|
||||
90 0.05916410911
|
||||
91 0.0579616133
|
||||
92 0.05699385395
|
||||
93 0.05602655066
|
||||
94 0.05504278655
|
||||
95 0.05437666404
|
||||
96 0.05331222916
|
||||
97 0.05243006893
|
||||
98 0.05125886848
|
||||
99 0.05042078409
|
||||
100 0.04906500038
|
||||
101 0.04801273434
|
||||
102 0.04719868007
|
||||
103 0.04637230139
|
||||
104 0.04556049209
|
||||
105 0.04486960057
|
||||
106 0.04434267739
|
||||
107 0.04353926113
|
||||
108 0.04288418322
|
||||
109 0.04203024255
|
||||
110 0.04125007863
|
||||
111 0.04024992793
|
||||
112 0.03927288462
|
||||
113 0.03862099917
|
||||
114 0.03806988678
|
||||
115 0.03743653744
|
||||
116 0.03682585001
|
||||
117 0.03626156118
|
||||
118 0.03564959041
|
||||
119 0.03497391878
|
||||
120 0.03449937964
|
||||
121 0.03376590421
|
||||
122 0.03317944842
|
||||
123 0.03259702652
|
||||
124 0.03215019078
|
||||
125 0.03168190575
|
||||
126 0.03128758131
|
||||
127 0.03088159004
|
||||
128 0.03026199694
|
||||
129 0.02969928844
|
||||
130 0.02923555878
|
||||
131 0.02876599571
|
||||
132 0.02840168858
|
||||
133 0.027898419
|
||||
134 0.02748629028
|
||||
135 0.02699862773
|
||||
136 0.02658184642
|
||||
137 0.02626729122
|
||||
138 0.02584897908
|
||||
139 0.02541972488
|
||||
140 0.02494266146
|
||||
141 0.02461502881
|
||||
142 0.02435833752
|
||||
143 0.02385493712
|
||||
144 0.02355815085
|
||||
145 0.02322093904
|
||||
146 0.02291996685
|
||||
147 0.02264413135
|
||||
148 0.02215165535
|
||||
149 0.02181815394
|
||||
150 0.02149164732
|
||||
151 0.02126195234
|
||||
152 0.02093905713
|
||||
153 0.02071414202
|
||||
154 0.02027789833
|
||||
155 0.0200181878
|
||||
156 0.01969523072
|
||||
157 0.01942084828
|
||||
158 0.01906941531
|
||||
159 0.01886946896
|
||||
160 0.01864012576
|
||||
161 0.01842769591
|
||||
162 0.01800340895
|
||||
163 0.01770345849
|
||||
164 0.01742747097
|
||||
165 0.0172003427
|
||||
166 0.01696784415
|
||||
167 0.01675681608
|
||||
168 0.0165077259
|
||||
169 0.01625819555
|
||||
170 0.01593321269
|
||||
171 0.01579540631
|
||||
172 0.01553198715
|
||||
173 0.01529339356
|
||||
174 0.01511632302
|
||||
175 0.01488638764
|
||||
176 0.01467439088
|
||||
177 0.01447361371
|
||||
178 0.01434924538
|
||||
179 0.01415692136
|
||||
180 0.01400349735
|
||||
181 0.01382728502
|
||||
182 0.01367671496
|
||||
183 0.01343311967
|
||||
184 0.01321273871
|
||||
185 0.01311189229
|
||||
186 0.01286989412
|
||||
187 0.01270532387
|
||||
188 0.01252386855
|
||||
189 0.01235938138
|
||||
190 0.01213016015
|
||||
191 0.01192818183
|
||||
192 0.01174079057
|
||||
193 0.01164422552
|
||||
194 0.01155274703
|
||||
195 0.01142114705
|
||||
196 0.01126747114
|
||||
197 0.01113826904
|
||||
198 0.01105491048
|
||||
199 0.01094620398
|
||||
200 0.0107897519
|
||||
201 0.0106831631
|
||||
202 0.01058511319
|
||||
203 0.01044539946
|
||||
204 0.01036265164
|
||||
205 0.0102634589
|
||||
206 0.0101961873
|
||||
207 0.01010858626
|
||||
208 0.01002694767
|
||||
209 0.009917525255
|
||||
210 0.009807562806
|
||||
211 0.009709659608
|
||||
212 0.009609314333
|
||||
213 0.009435213529
|
||||
214 0.009267268742
|
||||
215 0.009160809875
|
||||
216 0.009078839844
|
||||
217 0.008950104417
|
||||
218 0.00888653703
|
||||
219 0.008775804748
|
||||
220 0.008685220713
|
||||
221 0.008596151598
|
||||
222 0.00843048626
|
||||
223 0.008366484903
|
||||
224 0.008253302587
|
||||
225 0.008154898247
|
||||
226 0.008099292582
|
||||
227 0.008030711997
|
||||
228 0.00795822391
|
||||
229 0.007836438198
|
||||
230 0.007772867275
|
||||
231 0.007664250046
|
||||
232 0.007608829106
|
||||
233 0.007534453115
|
||||
234 0.007470206932
|
||||
235 0.007388324402
|
||||
236 0.007339898252
|
||||
237 0.007245330263
|
||||
238 0.007168469769
|
||||
239 0.007077650189
|
||||
240 0.006999819228
|
||||
241 0.006907054293
|
||||
242 0.006818165925
|
||||
243 0.00676407076
|
||||
244 0.006701122068
|
||||
245 0.006627371844
|
||||
246 0.006589996472
|
||||
247 0.006554308813
|
||||
248 0.006498397336
|
||||
249 0.006416075974
|
||||
250 0.006361855303
|
||||
251 0.006331474721
|
||||
252 0.006262989592
|
||||
253 0.006160215496
|
||||
254 0.006114280036
|
||||
255 0.006037985207
|
||||
256 0.00597384155
|
||||
257 0.005908762779
|
||||
258 0.005865229181
|
||||
259 0.005803677857
|
||||
260 0.005754558768
|
||||
261 0.005714900854
|
||||
262 0.005668938878
|
||||
263 0.00564153422
|
||||
264 0.005581819536
|
||||
265 0.005505347053
|
||||
266 0.005465319247
|
||||
267 0.005415476727
|
||||
268 0.00536813241
|
||||
269 0.005311889931
|
||||
270 0.005259258864
|
||||
271 0.005214808714
|
||||
272 0.005191945278
|
||||
273 0.005153187565
|
||||
274 0.005113676812
|
||||
275 0.005062828029
|
||||
276 0.005011375576
|
||||
277 0.004955984687
|
||||
278 0.004895690639
|
||||
279 0.004851626731
|
||||
280 0.00479070073
|
||||
281 0.004758315135
|
||||
282 0.004698995532
|
||||
283 0.004668266714
|
||||
284 0.004642802991
|
||||
285 0.004616858011
|
||||
286 0.004564104089
|
||||
287 0.004503435289
|
||||
288 0.004503419821
|
||||
289 0.004454941082
|
||||
290 0.004436089884
|
||||
291 0.004417716849
|
||||
292 0.00436981438
|
||||
293 0.00434852243
|
||||
294 0.004315405891
|
||||
295 0.004295244385
|
||||
296 0.004272305822
|
||||
297 0.004231514811
|
||||
298 0.004190003905
|
||||
299 0.004147418237
|
||||
300 0.004116146735
|
||||
301 0.004061566778
|
||||
302 0.004030863149
|
||||
303 0.003988591248
|
||||
304 0.00395778907
|
||||
305 0.003930500197
|
||||
306 0.003899783752
|
||||
307 0.003873748178
|
||||
308 0.003838509515
|
||||
309 0.003806858841
|
||||
310 0.003780498452
|
||||
311 0.003750855885
|
||||
312 0.003733034814
|
||||
313 0.003703924326
|
||||
314 0.003670365198
|
||||
315 0.00363597017
|
||||
316 0.003611067912
|
||||
317 0.003578873008
|
||||
318 0.003544636851
|
||||
319 0.003516000548
|
||||
320 0.003486875476
|
||||
321 0.003449603515
|
||||
322 0.003422755682
|
||||
323 0.003408672193
|
||||
324 0.00338415198
|
||||
325 0.003354939628
|
||||
326 0.003334189478
|
||||
327 0.003313474903
|
||||
328 0.003313474903
|
||||
329 0.003313477775
|
||||
330 0.003289821968
|
||||
331 0.003267459675
|
||||
332 0.003240799661
|
||||
333 0.003205838305
|
||||
334 0.003205785495
|
||||
335 0.003187324959
|
||||
336 0.003167171408
|
||||
337 0.003135866761
|
||||
338 0.003135805333
|
||||
339 0.003119384944
|
||||
340 0.003094751598
|
||||
341 0.003072778421
|
||||
342 0.003046316389
|
||||
343 0.003010126482
|
||||
344 0.002999184196
|
||||
345 0.002971393958
|
||||
346 0.002952100393
|
||||
347 0.002927057604
|
||||
348 0.002906064395
|
||||
349 0.002880973436
|
||||
350 0.002857434297
|
||||
351 0.002831897877
|
||||
352 0.002831879758
|
||||
353 0.002831880642
|
||||
354 0.00283185258
|
||||
355 0.002807546038
|
||||
356 0.002791071513
|
||||
357 0.002768233267
|
||||
358 0.002747346783
|
||||
359 0.002730674717
|
||||
360 0.002730663669
|
||||
361 0.00270765727
|
||||
362 0.002707630313
|
||||
363 0.002689885033
|
||||
364 0.002671150724
|
||||
365 0.002652489995
|
||||
366 0.002633375631
|
||||
367 0.002633324147
|
||||
368 0.00263331575
|
||||
369 0.00263331575
|
||||
370 0.002621703727
|
||||
371 0.002601895319
|
||||
372 0.002578357285
|
||||
373 0.002563084133
|
||||
374 0.002553124909
|
||||
375 0.002553112536
|
||||
376 0.002553094417
|
||||
377 0.002553094417
|
||||
378 0.00253620404
|
||||
379 0.002536187468
|
||||
380 0.002536171117
|
||||
381 0.002536143496
|
||||
382 0.002536143496
|
||||
383 0.002513597363
|
||||
384 0.002513565986
|
||||
385 0.002513547425
|
||||
386 0.002494829689
|
||||
387 0.002470754273
|
||||
388 0.002470731072
|
||||
389 0.002449697206
|
||||
390 0.002449695218
|
||||
391 0.002434601267
|
||||
392 0.002414722372
|
||||
393 0.002414575211
|
||||
394 0.002399905508
|
||||
395 0.002399884737
|
||||
396 0.002385647237
|
||||
397 0.00238562713
|
||||
398 0.00238562713
|
||||
399 0.002385628897
|
||||
400 0.002369015387
|
||||
401 0.00236900699
|
||||
402 0.002368983126
|
||||
403 0.002361901277
|
||||
404 0.002361854875
|
||||
405 0.002361864155
|
||||
406 0.00236183079
|
||||
407 0.002361810461
|
||||
408 0.002361746603
|
||||
409 0.002361746603
|
||||
410 0.002361737102
|
||||
411 0.002361720309
|
||||
412 0.002351297948
|
||||
413 0.002333661381
|
||||
414 0.002333661381
|
||||
415 0.002318700007
|
||||
416 0.00231867084
|
||||
417 0.002318685866
|
||||
418 0.002296192542
|
||||
419 0.00229618746
|
||||
420 0.002281454341
|
||||
421 0.002281415893
|
||||
422 0.002281417661
|
||||
423 0.002281389599
|
||||
424 0.002281389599
|
||||
425 0.00226771025
|
||||
426 0.002267697434
|
||||
427 0.002267694562
|
||||
428 0.002267671582
|
||||
429 0.002267653684
|
||||
430 0.002267653684
|
||||
431 0.002267666278
|
||||
432 0.002267630041
|
||||
433 0.002267621423
|
||||
434 0.002256095575
|
||||
435 0.002256036799
|
||||
436 0.002256019564
|
||||
437 0.002255996584
|
||||
438 0.002255975151
|
||||
439 0.002255944216
|
||||
440 0.002255927202
|
||||
441 0.002255898256
|
||||
442 0.002255898256
|
||||
443 0.002255898256
|
||||
444 0.002255898256
|
||||
445 0.002237031369
|
||||
446 0.002237036451
|
||||
447 0.002221210451
|
||||
448 0.00222120559
|
||||
449 0.002210740363
|
||||
450 0.002194734499
|
||||
451 0.002194707763
|
||||
452 0.00219468876
|
||||
453 0.002194694505
|
||||
454 0.002194666443
|
||||
455 0.002175334209
|
||||
456 0.002166846817
|
||||
457 0.002166824942
|
||||
458 0.002166792681
|
||||
459 0.002166766608
|
||||
460 0.002166747605
|
||||
461 0.002166747605
|
||||
462 0.002166750919
|
||||
463 0.002148725016
|
||||
464 0.002148725016
|
||||
465 0.002148730541
|
||||
466 0.002148697617
|
||||
467 0.00214868215
|
||||
468 0.002148675079
|
||||
469 0.002136127942
|
||||
470 0.002118017411
|
||||
471 0.002104941731
|
||||
472 0.002104892457
|
||||
473 0.002104878094
|
||||
474 0.002104869698
|
||||
475 0.002104843403
|
||||
476 0.002104843403
|
||||
477 0.002104850253
|
||||
478 0.002104835669
|
||||
479 0.002104778219
|
||||
480 0.002104778219
|
||||
481 0.002104778219
|
||||
482 0.002104781754
|
||||
483 0.002104777114
|
||||
484 0.002104762752
|
||||
485 0.002104742644
|
||||
486 0.002104728723
|
||||
487 0.002104708174
|
||||
488 0.002104675472
|
||||
489 0.002090229825
|
||||
490 0.002086724696
|
||||
491 0.00208671409
|
||||
492 0.002086702158
|
||||
493 0.002086705251
|
||||
494 0.002067750643
|
||||
495 0.002051501941
|
||||
496 0.002051467029
|
||||
497 0.002051466587
|
||||
498 0.002051456865
|
||||
499 0.002051436537
|
||||
|
501
classification_model/Parallel/catboost_info/time_left.tsv
Normal file
501
classification_model/Parallel/catboost_info/time_left.tsv
Normal file
@ -0,0 +1,501 @@
|
||||
iter Passed Remaining
|
||||
0 93 46835
|
||||
1 104 25915
|
||||
2 111 18484
|
||||
3 118 14656
|
||||
4 124 12293
|
||||
5 130 10746
|
||||
6 137 9691
|
||||
7 144 8887
|
||||
8 153 8355
|
||||
9 160 7855
|
||||
10 165 7372
|
||||
11 172 7006
|
||||
12 177 6667
|
||||
13 183 6382
|
||||
14 190 6151
|
||||
15 197 5963
|
||||
16 203 5791
|
||||
17 210 5629
|
||||
18 216 5481
|
||||
19 221 5327
|
||||
20 228 5206
|
||||
21 234 5103
|
||||
22 241 5014
|
||||
23 248 4936
|
||||
24 256 4868
|
||||
25 263 4797
|
||||
26 269 4726
|
||||
27 277 4672
|
||||
28 283 4600
|
||||
29 288 4520
|
||||
30 296 4484
|
||||
31 304 4456
|
||||
32 310 4393
|
||||
33 316 4338
|
||||
34 322 4290
|
||||
35 329 4249
|
||||
36 335 4203
|
||||
37 341 4150
|
||||
38 348 4115
|
||||
39 355 4083
|
||||
40 361 4044
|
||||
41 366 3999
|
||||
42 373 3969
|
||||
43 379 3934
|
||||
44 384 3891
|
||||
45 390 3856
|
||||
46 397 3829
|
||||
47 403 3802
|
||||
48 410 3774
|
||||
49 416 3751
|
||||
50 422 3721
|
||||
51 429 3701
|
||||
52 436 3683
|
||||
53 444 3669
|
||||
54 450 3641
|
||||
55 455 3613
|
||||
56 461 3583
|
||||
57 468 3567
|
||||
58 474 3548
|
||||
59 480 3526
|
||||
60 486 3501
|
||||
61 492 3479
|
||||
62 499 3461
|
||||
63 504 3438
|
||||
64 511 3422
|
||||
65 517 3405
|
||||
66 523 3382
|
||||
67 529 3365
|
||||
68 536 3349
|
||||
69 542 3331
|
||||
70 548 3312
|
||||
71 554 3295
|
||||
72 559 3275
|
||||
73 564 3251
|
||||
74 571 3240
|
||||
75 577 3220
|
||||
76 583 3204
|
||||
77 590 3193
|
||||
78 596 3179
|
||||
79 602 3161
|
||||
80 607 3144
|
||||
81 614 3130
|
||||
82 620 3115
|
||||
83 625 3097
|
||||
84 630 3080
|
||||
85 637 3067
|
||||
86 643 3055
|
||||
87 649 3041
|
||||
88 655 3026
|
||||
89 660 3010
|
||||
90 666 2996
|
||||
91 671 2979
|
||||
92 677 2965
|
||||
93 684 2956
|
||||
94 689 2940
|
||||
95 696 2929
|
||||
96 702 2917
|
||||
97 708 2904
|
||||
98 713 2889
|
||||
99 719 2876
|
||||
100 724 2864
|
||||
101 731 2853
|
||||
102 736 2838
|
||||
103 742 2825
|
||||
104 748 2815
|
||||
105 754 2804
|
||||
106 761 2795
|
||||
107 766 2783
|
||||
108 772 2771
|
||||
109 778 2760
|
||||
110 785 2753
|
||||
111 792 2744
|
||||
112 798 2733
|
||||
113 803 2720
|
||||
114 810 2711
|
||||
115 816 2701
|
||||
116 820 2687
|
||||
117 826 2674
|
||||
118 833 2667
|
||||
119 838 2656
|
||||
120 844 2644
|
||||
121 850 2635
|
||||
122 856 2625
|
||||
123 862 2614
|
||||
124 867 2603
|
||||
125 873 2591
|
||||
126 879 2581
|
||||
127 885 2572
|
||||
128 891 2564
|
||||
129 898 2556
|
||||
130 904 2546
|
||||
131 909 2535
|
||||
132 917 2532
|
||||
133 926 2531
|
||||
134 933 2523
|
||||
135 939 2515
|
||||
136 946 2507
|
||||
137 953 2500
|
||||
138 961 2497
|
||||
139 967 2488
|
||||
140 974 2480
|
||||
141 980 2471
|
||||
142 986 2463
|
||||
143 993 2455
|
||||
144 999 2446
|
||||
145 1004 2435
|
||||
146 1010 2425
|
||||
147 1016 2418
|
||||
148 1023 2410
|
||||
149 1028 2400
|
||||
150 1035 2393
|
||||
151 1041 2384
|
||||
152 1046 2373
|
||||
153 1052 2364
|
||||
154 1058 2355
|
||||
155 1064 2347
|
||||
156 1070 2338
|
||||
157 1077 2331
|
||||
158 1082 2322
|
||||
159 1088 2313
|
||||
160 1094 2305
|
||||
161 1100 2296
|
||||
162 1105 2286
|
||||
163 1112 2279
|
||||
164 1118 2271
|
||||
165 1124 2263
|
||||
166 1130 2255
|
||||
167 1137 2247
|
||||
168 1142 2238
|
||||
169 1148 2228
|
||||
170 1154 2221
|
||||
171 1160 2212
|
||||
172 1166 2204
|
||||
173 1171 2195
|
||||
174 1177 2187
|
||||
175 1184 2180
|
||||
176 1189 2170
|
||||
177 1194 2160
|
||||
178 1200 2153
|
||||
179 1206 2145
|
||||
180 1211 2135
|
||||
181 1217 2126
|
||||
182 1223 2119
|
||||
183 1229 2111
|
||||
184 1234 2102
|
||||
185 1240 2094
|
||||
186 1246 2086
|
||||
187 1251 2077
|
||||
188 1256 2068
|
||||
189 1263 2061
|
||||
190 1269 2053
|
||||
191 1274 2044
|
||||
192 1280 2037
|
||||
193 1287 2030
|
||||
194 1294 2024
|
||||
195 1301 2018
|
||||
196 1307 2011
|
||||
197 1317 2009
|
||||
198 1327 2008
|
||||
199 1335 2003
|
||||
200 1342 1997
|
||||
201 1349 1990
|
||||
202 1355 1983
|
||||
203 1360 1974
|
||||
204 1367 1967
|
||||
205 1372 1959
|
||||
206 1377 1950
|
||||
207 1383 1941
|
||||
208 1389 1934
|
||||
209 1395 1926
|
||||
210 1401 1919
|
||||
211 1407 1912
|
||||
212 1415 1906
|
||||
213 1423 1901
|
||||
214 1428 1893
|
||||
215 1434 1886
|
||||
216 1440 1878
|
||||
217 1445 1870
|
||||
218 1452 1863
|
||||
219 1458 1855
|
||||
220 1464 1848
|
||||
221 1469 1840
|
||||
222 1476 1833
|
||||
223 1482 1826
|
||||
224 1488 1819
|
||||
225 1493 1811
|
||||
226 1499 1803
|
||||
227 1505 1795
|
||||
228 1511 1788
|
||||
229 1517 1781
|
||||
230 1523 1774
|
||||
231 1529 1767
|
||||
232 1535 1759
|
||||
233 1541 1752
|
||||
234 1547 1744
|
||||
235 1552 1737
|
||||
236 1558 1728
|
||||
237 1563 1720
|
||||
238 1568 1712
|
||||
239 1573 1704
|
||||
240 1579 1697
|
||||
241 1584 1689
|
||||
242 1589 1681
|
||||
243 1598 1677
|
||||
244 1604 1670
|
||||
245 1609 1662
|
||||
246 1615 1654
|
||||
247 1620 1646
|
||||
248 1625 1638
|
||||
249 1632 1632
|
||||
250 1638 1625
|
||||
251 1643 1617
|
||||
252 1649 1610
|
||||
253 1655 1603
|
||||
254 1660 1595
|
||||
255 1666 1588
|
||||
256 1672 1580
|
||||
257 1677 1573
|
||||
258 1682 1565
|
||||
259 1688 1558
|
||||
260 1696 1553
|
||||
261 1702 1546
|
||||
262 1708 1539
|
||||
263 1713 1532
|
||||
264 1719 1524
|
||||
265 1725 1517
|
||||
266 1737 1516
|
||||
267 1744 1509
|
||||
268 1750 1502
|
||||
269 1756 1496
|
||||
270 1762 1488
|
||||
271 1767 1481
|
||||
272 1773 1474
|
||||
273 1778 1466
|
||||
274 1784 1460
|
||||
275 1792 1454
|
||||
276 1797 1447
|
||||
277 1803 1440
|
||||
278 1809 1433
|
||||
279 1815 1426
|
||||
280 1820 1419
|
||||
281 1826 1411
|
||||
282 1832 1404
|
||||
283 1837 1397
|
||||
284 1843 1390
|
||||
285 1849 1383
|
||||
286 1854 1376
|
||||
287 1860 1369
|
||||
288 1866 1362
|
||||
289 1872 1356
|
||||
290 1878 1349
|
||||
291 1884 1342
|
||||
292 1890 1335
|
||||
293 1896 1328
|
||||
294 1902 1322
|
||||
295 1909 1315
|
||||
296 1916 1310
|
||||
297 1922 1303
|
||||
298 1928 1296
|
||||
299 1935 1290
|
||||
300 1941 1283
|
||||
301 1948 1277
|
||||
302 1954 1270
|
||||
303 1960 1263
|
||||
304 1966 1257
|
||||
305 1972 1250
|
||||
306 1979 1244
|
||||
307 1985 1237
|
||||
308 1992 1231
|
||||
309 1998 1224
|
||||
310 2004 1218
|
||||
311 2010 1211
|
||||
312 2017 1205
|
||||
313 2023 1198
|
||||
314 2029 1192
|
||||
315 2036 1185
|
||||
316 2042 1178
|
||||
317 2049 1172
|
||||
318 2056 1166
|
||||
319 2065 1161
|
||||
320 2073 1155
|
||||
321 2079 1149
|
||||
322 2087 1143
|
||||
323 2093 1137
|
||||
324 2098 1130
|
||||
325 2104 1123
|
||||
326 2111 1116
|
||||
327 2118 1110
|
||||
328 2125 1104
|
||||
329 2132 1098
|
||||
330 2137 1091
|
||||
331 2144 1085
|
||||
332 2152 1079
|
||||
333 2159 1073
|
||||
334 2164 1066
|
||||
335 2170 1059
|
||||
336 2178 1053
|
||||
337 2186 1047
|
||||
338 2193 1041
|
||||
339 2200 1035
|
||||
340 2206 1028
|
||||
341 2213 1022
|
||||
342 2221 1016
|
||||
343 2228 1010
|
||||
344 2234 1004
|
||||
345 2241 997
|
||||
346 2249 991
|
||||
347 2257 985
|
||||
348 2264 979
|
||||
349 2271 973
|
||||
350 2278 967
|
||||
351 2285 960
|
||||
352 2294 955
|
||||
353 2301 949
|
||||
354 2307 942
|
||||
355 2315 936
|
||||
356 2321 930
|
||||
357 2328 923
|
||||
358 2334 916
|
||||
359 2341 910
|
||||
360 2347 903
|
||||
361 2353 897
|
||||
362 2360 890
|
||||
363 2366 884
|
||||
364 2373 877
|
||||
365 2380 871
|
||||
366 2386 864
|
||||
367 2392 858
|
||||
368 2399 851
|
||||
369 2405 845
|
||||
370 2411 838
|
||||
371 2417 831
|
||||
372 2423 825
|
||||
373 2429 818
|
||||
374 2436 812
|
||||
375 2442 805
|
||||
376 2448 798
|
||||
377 2454 792
|
||||
378 2460 785
|
||||
379 2466 778
|
||||
380 2472 772
|
||||
381 2478 765
|
||||
382 2483 758
|
||||
383 2489 752
|
||||
384 2495 745
|
||||
385 2501 738
|
||||
386 2506 731
|
||||
387 2512 725
|
||||
388 2518 718
|
||||
389 2523 711
|
||||
390 2529 705
|
||||
391 2534 698
|
||||
392 2540 691
|
||||
393 2546 684
|
||||
394 2551 678
|
||||
395 2556 671
|
||||
396 2562 664
|
||||
397 2567 658
|
||||
398 2573 651
|
||||
399 2578 644
|
||||
400 2584 638
|
||||
401 2589 631
|
||||
402 2595 624
|
||||
403 2600 618
|
||||
404 2606 611
|
||||
405 2612 604
|
||||
406 2617 598
|
||||
407 2622 591
|
||||
408 2628 584
|
||||
409 2633 578
|
||||
410 2639 571
|
||||
411 2644 564
|
||||
412 2650 558
|
||||
413 2655 551
|
||||
414 2660 545
|
||||
415 2666 538
|
||||
416 2671 531
|
||||
417 2677 525
|
||||
418 2683 518
|
||||
419 2688 512
|
||||
420 2694 505
|
||||
421 2699 499
|
||||
422 2705 492
|
||||
423 2710 485
|
||||
424 2716 479
|
||||
425 2721 472
|
||||
426 2727 466
|
||||
427 2733 459
|
||||
428 2738 453
|
||||
429 2744 446
|
||||
430 2749 440
|
||||
431 2754 433
|
||||
432 2759 427
|
||||
433 2764 420
|
||||
434 2769 413
|
||||
435 2774 407
|
||||
436 2779 400
|
||||
437 2784 394
|
||||
438 2790 387
|
||||
439 2796 381
|
||||
440 2802 374
|
||||
441 2808 368
|
||||
442 2814 362
|
||||
443 2820 355
|
||||
444 2828 349
|
||||
445 2836 343
|
||||
446 2843 337
|
||||
447 2852 331
|
||||
448 2859 324
|
||||
449 2866 318
|
||||
450 2874 312
|
||||
451 2882 306
|
||||
452 2891 299
|
||||
453 2898 293
|
||||
454 2904 287
|
||||
455 2911 280
|
||||
456 2917 274
|
||||
457 2924 268
|
||||
458 2930 261
|
||||
459 2937 255
|
||||
460 2944 249
|
||||
461 2950 242
|
||||
462 2956 236
|
||||
463 2962 229
|
||||
464 2968 223
|
||||
465 2974 217
|
||||
466 2980 210
|
||||
467 2987 204
|
||||
468 2993 197
|
||||
469 2999 191
|
||||
470 3005 185
|
||||
471 3011 178
|
||||
472 3017 172
|
||||
473 3023 165
|
||||
474 3029 159
|
||||
475 3035 153
|
||||
476 3040 146
|
||||
477 3046 140
|
||||
478 3051 133
|
||||
479 3057 127
|
||||
480 3063 120
|
||||
481 3068 114
|
||||
482 3074 108
|
||||
483 3079 101
|
||||
484 3085 95
|
||||
485 3091 89
|
||||
486 3097 82
|
||||
487 3103 76
|
||||
488 3108 69
|
||||
489 3115 63
|
||||
490 3120 57
|
||||
491 3126 50
|
||||
492 3132 44
|
||||
493 3137 38
|
||||
494 3143 31
|
||||
495 3148 25
|
||||
496 3153 19
|
||||
497 3157 12
|
||||
498 3162 6
|
||||
499 3167 0
|
||||
|
649
classification_model/Parallel/predict_plastic.py
Normal file
649
classification_model/Parallel/predict_plastic.py
Normal file
@ -0,0 +1,649 @@
|
||||
"""
|
||||
-*- coding: utf-8 -*-
|
||||
@Time :2022/04/12 17:10
|
||||
@Author : Pengyou FU
|
||||
@blogs : https://blog.csdn.net/Echo_Code?spm=1000.2115.3001.5343
|
||||
@github : https://github.com/FuSiry/OpenSA
|
||||
@WeChat : Fu_siry
|
||||
@License:Apache-2.0 license
|
||||
|
||||
"""
|
||||
from imblearn.over_sampling import SMOTE
|
||||
import pandas as pd
|
||||
from classification_model.DataLoad.DataLoad import SetSplit, LoadNirtest
|
||||
from classification_model.Preprocessing.Preprocessing import Preprocessing
|
||||
from classification_model.WaveSelect.WaveSelcet import SpctrumFeatureSelcet
|
||||
from classification_model.Classification.ClassicCls import (
|
||||
LogisticRegressionModel, SVM as SVM_Classic, PLS_DA, RF,
|
||||
XGBoost, LightGBM, CatBoost, AdaBoost, KNN
|
||||
)
|
||||
import warnings
|
||||
warnings.filterwarnings("ignore", category=FutureWarning)
|
||||
import sklearn.svm as svm
|
||||
from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split
|
||||
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, precision_score, recall_score, f1_score
|
||||
import numpy as np
|
||||
import joblib
|
||||
import os
|
||||
from sklearn.svm import SVC
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn.metrics import classification_report, confusion_matrix
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
def cross_validate_model(model, X, y, cv=5):
|
||||
"""
|
||||
|
||||
:param model: 模型
|
||||
:param X:
|
||||
:param y:
|
||||
:param cv: 折数
|
||||
:return:
|
||||
"""
|
||||
scores = cross_val_score(model, X, y, cv=cv)
|
||||
print(f"Cross-validation accuracy: {scores.mean():.4f} ± {scores.std():.4f}")
|
||||
return scores
|
||||
|
||||
|
||||
# 混淆矩阵与分类报告
|
||||
def evaluate_model(y_true, y_pred, dataset_name="Test", title="Confusion Matrix", cmap='Blues'):
|
||||
"""
|
||||
性能评估,包含分类报告和混淆矩阵,且标签从 1 开始。
|
||||
|
||||
参数:
|
||||
y_true -- 真实标签
|
||||
y_pred -- 预测标签
|
||||
dataset_name -- 数据集名称(如 "Train" 或 "Test")
|
||||
title -- 图表标题
|
||||
cmap -- 热力图颜色
|
||||
"""
|
||||
print(f"{dataset_name} Classification Report:")
|
||||
print(classification_report(y_true, y_pred))
|
||||
|
||||
# 计算混淆矩阵
|
||||
cm = confusion_matrix(y_true, y_pred)
|
||||
|
||||
# 绘制热力图(此部分可选择性取消注释以显示图形)
|
||||
# plt.figure(figsize=(8, 6))
|
||||
# ax = sns.heatmap(cm, annot=True, fmt='g', cmap=cmap, cbar=True,
|
||||
# linewidths=0.5, linecolor='black', square=True,
|
||||
# annot_kws={"size": 12})
|
||||
# ax.set_title(f"{dataset_name} {title}", fontsize=16)
|
||||
# ax.set_xlabel('Predicted Label', fontsize=14)
|
||||
# ax.set_ylabel('True Label', fontsize=14)
|
||||
# plt.tight_layout()
|
||||
# plt.show()
|
||||
|
||||
# 返回多个性能指标的字典,包括混淆矩阵
|
||||
return {
|
||||
"accuracy": accuracy_score(y_true, y_pred),
|
||||
"precision": precision_score(y_true, y_pred, average='weighted'),
|
||||
"recall": recall_score(y_true, y_pred, average='weighted'),
|
||||
"f1_score": f1_score(y_true, y_pred, average='weighted'),
|
||||
"confusion_matrix": cm
|
||||
}
|
||||
|
||||
|
||||
# 光谱定性分析
|
||||
def SpectralQualitativeAnalysis(data, label, ProcessMethods, ProcessMethods2, FslecetedMethods, SetSplitMethods, use_smote=False):
|
||||
# 预处理
|
||||
ProcesedData = Preprocessing(ProcessMethods, data)
|
||||
ProcesedData2 = Preprocessing(ProcessMethods2, ProcesedData)
|
||||
|
||||
# 特征选择
|
||||
FeatrueData, labels, selected_columns = SpctrumFeatureSelcet(FslecetedMethods, ProcesedData2, label)
|
||||
|
||||
# 数据划分
|
||||
X_train, X_test, y_train, y_test = SetSplit(SetSplitMethods, FeatrueData, labels, test_size=0.3, randomseed=42)
|
||||
|
||||
# 使用 SMOTE 增加少数类别样本
|
||||
if use_smote:
|
||||
smote = SMOTE(random_state=42)
|
||||
X_train, y_train = smote.fit_resample(X_train, y_train)
|
||||
print("SMOTE applied: Training set size after resampling:", len(y_train))
|
||||
|
||||
# 模型训练和评估
|
||||
|
||||
return X_train, X_test, y_train, y_test
|
||||
|
||||
def Procesed(data, ProcessMethods1, ProcessMethods2, model_path):
|
||||
"""
|
||||
对数据进行预处理,支持两种预处理方法
|
||||
|
||||
:param data: 输入数据
|
||||
:param ProcessMethods1: 第一种预处理方法(如 'SS', 'MMS', 'None'等)
|
||||
:param ProcessMethods2: 第二种预处理方法(如 'SG', 'D1', 'None'等)
|
||||
:param model_path: 模型路径(用于定位scaler_params.pkl)
|
||||
:return: 预处理后的数据
|
||||
"""
|
||||
import os
|
||||
from classification_model.Preprocessing.Preprocessing import Preprocessing
|
||||
|
||||
# 第一步预处理
|
||||
if ProcessMethods1 == 'SS':
|
||||
# 当第一种预处理方法为SS时,需要加载保存的scaler
|
||||
model_dir = os.path.dirname(model_path)
|
||||
scaler_path = os.path.join(model_dir, 'scaler_params.pkl')
|
||||
if not os.path.exists(scaler_path):
|
||||
raise FileNotFoundError(f"Scaler file not found at {scaler_path}. Please ensure the model was trained with SS preprocessing.")
|
||||
loaded_scaler = joblib.load(scaler_path)
|
||||
transformed_data = loaded_scaler.transform(data)
|
||||
# 转换为DataFrame格式以便后续处理
|
||||
transformed_data_layout = pd.DataFrame(transformed_data)
|
||||
elif ProcessMethods1 == 'None' or ProcessMethods1 is None:
|
||||
# 如果第一种预处理方法为None,直接使用原始数据
|
||||
transformed_data_layout = pd.DataFrame(data) if not isinstance(data, pd.DataFrame) else data
|
||||
else:
|
||||
# 其他预处理方法直接调用Preprocessing函数
|
||||
transformed_data_layout = Preprocessing(ProcessMethods1, data)
|
||||
if isinstance(transformed_data_layout, np.ndarray):
|
||||
transformed_data_layout = pd.DataFrame(transformed_data_layout)
|
||||
|
||||
# 第二步预处理
|
||||
if ProcessMethods2 == 'None' or ProcessMethods2 is None:
|
||||
ProcesedData2 = transformed_data_layout
|
||||
else:
|
||||
ProcesedData2 = Preprocessing(ProcessMethods2, transformed_data_layout)
|
||||
if isinstance(ProcesedData2, np.ndarray):
|
||||
ProcesedData2 = pd.DataFrame(ProcesedData2)
|
||||
|
||||
return ProcesedData2
|
||||
|
||||
|
||||
def SVM_with_kernels_visualization(X_train, X_test, y_train, y_test, param_grid=None):
|
||||
"""
|
||||
针对不同核函数的 SVM 模型进行超参数调优,使用测试集验证最佳模型,并分别绘制三维网络图。
|
||||
|
||||
:param X_train: 训练集特征
|
||||
:param X_test: 测试集特征
|
||||
:param y_train: 训练集标签
|
||||
:param y_test: 测试集标签
|
||||
:param param_grid: 超参数网格,默认为 None。如果 None,使用默认参数范围。
|
||||
"""
|
||||
if param_grid is None:
|
||||
# 默认参数网格
|
||||
param_grid = {
|
||||
'C': [0.1, 1, 10, 100],
|
||||
'gamma': [1, 0.1, 0.01, 0.001],
|
||||
'kernel': ['linear', 'rbf', 'poly']
|
||||
}
|
||||
|
||||
# 初始化 SVM 模型
|
||||
svc = SVC()
|
||||
|
||||
# 网格搜索
|
||||
grid_search = GridSearchCV(estimator=svc, param_grid=param_grid, cv=5, scoring='accuracy', verbose=1, n_jobs=-1)
|
||||
grid_search.fit(X_train, y_train)
|
||||
|
||||
# 输出最优参数和对应的分数
|
||||
print("Best Parameters:", grid_search.best_params_)
|
||||
print("Best Cross-Validation Score:", grid_search.best_score_)
|
||||
|
||||
# 测试集验证最佳模型
|
||||
best_model = grid_search.best_estimator_
|
||||
y_test_pred = best_model.predict(X_test)
|
||||
|
||||
print("\nTest Set Evaluation:")
|
||||
print(classification_report(y_test, y_test_pred))
|
||||
print("Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))
|
||||
print(f"Test Accuracy: {accuracy_score(y_test, y_test_pred):.4f}")
|
||||
|
||||
# 获取搜索结果
|
||||
results = grid_search.cv_results_
|
||||
|
||||
# 按核函数类型分别绘制三维网格图
|
||||
kernels = np.unique(param_grid['kernel'])
|
||||
for kernel in kernels:
|
||||
kernel_indices = [i for i, params in enumerate(results['params']) if params['kernel'] == kernel]
|
||||
C_values = [results['params'][i]['C'] for i in kernel_indices]
|
||||
gamma_values = [results['params'][i]['gamma'] for i in kernel_indices if 'gamma' in results['params'][i]]
|
||||
scores = results['mean_test_score'][kernel_indices]
|
||||
|
||||
# 如果是线性核,不需要绘制 gamma 参数
|
||||
if kernel == 'linear':
|
||||
plot_linear_kernel(C_values, scores, kernel)
|
||||
else:
|
||||
plot_3D_grid(C_values, gamma_values, scores, kernel)
|
||||
|
||||
return best_model
|
||||
|
||||
def plot_3D_grid(C_values, gamma_values, scores, kernel):
|
||||
"""
|
||||
绘制三维超参数网络图(针对 RBF 和多项式核),添加颜色梯度。
|
||||
|
||||
:param C_values: C 参数的列表
|
||||
:param gamma_values: gamma 参数的列表
|
||||
:param scores: 对应的交叉验证分数
|
||||
:param kernel: 核函数名称
|
||||
"""
|
||||
# 将数据转化为网格形式
|
||||
C_unique = np.unique(C_values)
|
||||
gamma_unique = np.unique(gamma_values)
|
||||
C_grid, gamma_grid = np.meshgrid(C_unique, gamma_unique)
|
||||
|
||||
# 构建 Z 轴(对应交叉验证得分)
|
||||
Z = np.zeros_like(C_grid)
|
||||
for i, c in enumerate(C_unique):
|
||||
for j, gamma in enumerate(gamma_unique):
|
||||
indices = [k for k, val in enumerate(C_values) if val == c and gamma_values[k] == gamma]
|
||||
if indices:
|
||||
Z[j, i] = scores[indices[0]]
|
||||
|
||||
# 转换 C 和 gamma 为对数尺度
|
||||
log_C_grid = np.log10(C_grid)
|
||||
log_gamma_grid = np.log10(gamma_grid)
|
||||
|
||||
# 绘制三维表面图并添加颜色梯度
|
||||
fig = plt.figure(figsize=(12, 8))
|
||||
ax = fig.add_subplot(111, projection='3d')
|
||||
surface = ax.plot_surface(
|
||||
log_C_grid, log_gamma_grid, Z, cmap='viridis', edgecolor='k', alpha=0.8
|
||||
)
|
||||
|
||||
# 添加颜色条
|
||||
cbar = fig.colorbar(surface, pad=0.1, shrink=0.5, aspect=10)
|
||||
cbar.set_label('Mean Accuracy', fontsize=12)
|
||||
|
||||
# 设置坐标轴和标题
|
||||
ax.set_title(f'3D Hyperparameter Grid ({kernel} kernel)', fontsize=16)
|
||||
ax.set_xlabel('Log10(C)', fontsize=12)
|
||||
ax.set_ylabel('Log10(Gamma)', fontsize=12)
|
||||
ax.set_zlabel('Mean Accuracy', fontsize=12)
|
||||
|
||||
# 显示图形
|
||||
plt.show()
|
||||
|
||||
def plot_linear_kernel(C_values, scores, kernel):
|
||||
"""
|
||||
绘制线性核的超参数网络图(仅针对 C 参数)。
|
||||
|
||||
:param C_values: C 参数的列表
|
||||
:param scores: 对应的交叉验证分数
|
||||
:param kernel: 核函数名称
|
||||
"""
|
||||
# 将 C 转换为对数尺度
|
||||
C_values = np.log10(C_values)
|
||||
|
||||
# 创建二维折线图
|
||||
plt.figure(figsize=(8, 6))
|
||||
plt.plot(C_values, scores, marker='o', label='Mean Accuracy')
|
||||
plt.xlabel('Log10(C)', fontsize=12)
|
||||
plt.ylabel('Mean Accuracy', fontsize=12)
|
||||
plt.title(f'Hyperparameter Tuning ({kernel} kernel)', fontsize=16)
|
||||
plt.grid(True)
|
||||
plt.legend()
|
||||
plt.show()
|
||||
|
||||
# 分类并填充结果到标签数组
|
||||
def classify_and_fill(segments, superpixel_features, model, label_array):
|
||||
"""
|
||||
|
||||
:param segments:
|
||||
:param superpixel_features:
|
||||
:param model: 模型
|
||||
:param label_array:
|
||||
:return: 类别列
|
||||
"""
|
||||
for segment, feature in superpixel_features.items():
|
||||
# 将高光谱平均特征输入模型,预测类别
|
||||
label = model.predict([feature])[0]
|
||||
# 填充到标签数组的对应位置
|
||||
label_array[segments == segment] = label
|
||||
return label_array
|
||||
|
||||
def save_model(model, model_path, model_type='SVM'):
|
||||
"""
|
||||
保存模型到指定路径
|
||||
:param model: 训练好的模型对象
|
||||
:param model_path: 模型保存路径
|
||||
:param model_type: 模型类型(用于文件命名)
|
||||
:return: 保存的完整路径
|
||||
"""
|
||||
os.makedirs(os.path.dirname(model_path), exist_ok=True)
|
||||
joblib.dump(model, model_path)
|
||||
print(f"{model_type} model saved to: {model_path}")
|
||||
return model_path
|
||||
|
||||
def load_model(model_path):
|
||||
"""
|
||||
加载模型(支持所有模型类型)
|
||||
:param model_path: 模型路径
|
||||
:return: 加载的模型
|
||||
"""
|
||||
return joblib.load(model_path)
|
||||
|
||||
def predict_and_save(df, model_path, model_type='SVM', ProcessMethods1='SS', ProcessMethods2='SG'):
|
||||
"""
|
||||
预测模型(支持所有模型类型)
|
||||
:param df: 包含反射率和形状特征的dataframe
|
||||
:param model_path: 模型的路径
|
||||
:param model_type: 模型类型(可选,用于特殊处理)
|
||||
:param ProcessMethods1: 第一次预处理方法,默认为'SS'(当为'SS'时会加载scaler_params.pkl)
|
||||
:param ProcessMethods2: 第二次预处理方法,默认为'SG'
|
||||
:return: 包含预测类别列的dataframe
|
||||
"""
|
||||
model = load_model(model_path)
|
||||
|
||||
# 找到轮廓列的索引
|
||||
contour_col_idx = None
|
||||
if 'contour' in df.columns:
|
||||
contour_col_idx = df.columns.get_loc('contour')
|
||||
|
||||
# 选择所有数值列(排除轮廓列)
|
||||
numeric_cols = []
|
||||
for i in range(1, df.shape[1]): # 跳过第一列(可能是类别或ID)
|
||||
if i != contour_col_idx:
|
||||
col_name = df.columns[i]
|
||||
# 只选择数值类型的列
|
||||
if df[col_name].dtype in ['int64', 'float64']:
|
||||
numeric_cols.append(col_name)
|
||||
|
||||
# 加载数据
|
||||
x = df[numeric_cols]
|
||||
|
||||
# 进行预处理(支持两种预处理方法)
|
||||
Procesed_features = Procesed(x, ProcessMethods1, ProcessMethods2, model_path)
|
||||
|
||||
# 确保Procesed_features是numpy数组格式供模型预测
|
||||
if isinstance(Procesed_features, pd.DataFrame):
|
||||
Procesed_features = Procesed_features.values
|
||||
|
||||
# 进行预测
|
||||
predictions = model.predict(Procesed_features)
|
||||
df['Predictions'] = predictions
|
||||
|
||||
return df
|
||||
|
||||
def SVM(X_train, X_test, y_train, y_test, kernel='linear', C=1, gamma=1e-3):
|
||||
clf = svm.SVC(C=C, kernel=kernel, gamma=gamma)
|
||||
|
||||
# 交叉验证
|
||||
cross_validate_model(clf, X_train, y_train)
|
||||
|
||||
# 模型拟合
|
||||
clf.fit(X_train, y_train.ravel())
|
||||
|
||||
# 训练集评估
|
||||
y_train_pred = clf.predict(X_train)
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, dataset_name="Train")
|
||||
|
||||
# 测试集评估
|
||||
y_test_pred = clf.predict(X_test)
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, dataset_name="Test")
|
||||
|
||||
return clf
|
||||
|
||||
# ==================== 所有模型的训练函数(返回模型对象)====================
|
||||
|
||||
def train_LogisticRegression(X_train, X_test, y_train, y_test, penalty='l2', C=1.0, solver='lbfgs', max_iter=200):
|
||||
"""训练逻辑回归模型并返回模型对象"""
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
model = LogisticRegression(penalty=penalty, C=C, solver=solver, max_iter=max_iter, multi_class='multinomial', random_state=1)
|
||||
|
||||
cross_validate_model(model, X_train, y_train)
|
||||
model.fit(X_train, y_train.ravel())
|
||||
|
||||
y_train_pred = model.predict(X_train)
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, dataset_name="Train")
|
||||
|
||||
y_test_pred = model.predict(X_test)
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, dataset_name="Test")
|
||||
|
||||
return model
|
||||
|
||||
def train_PLS_DA(X_train, X_test, y_train, y_test, n_components=40):
|
||||
"""训练PLS-DA模型并返回模型对象"""
|
||||
from sklearn.cross_decomposition import PLSRegression
|
||||
y_train_encoded = pd.get_dummies(y_train)
|
||||
model = PLSRegression(n_components=n_components)
|
||||
|
||||
model.fit(X_train, y_train_encoded)
|
||||
|
||||
y_train_pred = model.predict(X_train)
|
||||
y_train_pred = np.argmax(y_train_pred, axis=1)
|
||||
train_metrics = evaluate_model(np.argmax(y_train_encoded.values, axis=1), y_train_pred, dataset_name="Train")
|
||||
|
||||
y_test_pred = model.predict(X_test)
|
||||
y_test_pred = np.argmax(y_test_pred, axis=1)
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, dataset_name="Test")
|
||||
|
||||
return model
|
||||
|
||||
def train_RF(X_train, X_test, y_train, y_test, n_estimators=200, max_depth=15, n_jobs=-1):
|
||||
"""训练随机森林模型并返回模型对象"""
|
||||
from sklearn.ensemble import RandomForestClassifier
|
||||
model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=1, n_jobs=n_jobs)
|
||||
|
||||
cross_validate_model(model, X_train, y_train, n_jobs=n_jobs)
|
||||
model.fit(X_train, y_train.ravel())
|
||||
|
||||
y_train_pred = model.predict(X_train)
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, dataset_name="Train")
|
||||
|
||||
y_test_pred = model.predict(X_test)
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, dataset_name="Test")
|
||||
|
||||
return model
|
||||
|
||||
def train_XGBoost(X_train, X_test, y_train, y_test, n_estimators=100, learning_rate=0.1, max_depth=3):
|
||||
"""训练XGBoost模型并返回模型对象"""
|
||||
import xgboost as xgb
|
||||
model = xgb.XGBClassifier(
|
||||
n_estimators=n_estimators,
|
||||
learning_rate=learning_rate,
|
||||
max_depth=max_depth,
|
||||
random_state=1,
|
||||
gpu_id=0
|
||||
)
|
||||
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
y_train_pred = model.predict(X_train)
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, dataset_name="Train")
|
||||
|
||||
y_test_pred = model.predict(X_test)
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, dataset_name="Test")
|
||||
|
||||
return model
|
||||
|
||||
def train_LightGBM(X_train, X_test, y_train, y_test, n_estimators=100, learning_rate=0.1, max_depth=-1, num_leaves=31):
|
||||
"""训练LightGBM模型并返回模型对象"""
|
||||
import lightgbm as lgb
|
||||
model = lgb.LGBMClassifier(
|
||||
n_estimators=n_estimators,
|
||||
learning_rate=learning_rate,
|
||||
max_depth=max_depth,
|
||||
num_leaves=num_leaves,
|
||||
random_state=1
|
||||
)
|
||||
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
y_train_pred = model.predict(X_train)
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, dataset_name="Train")
|
||||
|
||||
y_test_pred = model.predict(X_test)
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, dataset_name="Test")
|
||||
|
||||
return model
|
||||
|
||||
def train_CatBoost(X_train, X_test, y_train, y_test, iterations=500, learning_rate=0.1, depth=6):
|
||||
"""训练CatBoost模型并返回模型对象"""
|
||||
import catboost as cb
|
||||
model = cb.CatBoostClassifier(
|
||||
iterations=iterations,
|
||||
learning_rate=learning_rate,
|
||||
depth=depth,
|
||||
random_seed=1,
|
||||
verbose=0
|
||||
)
|
||||
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
y_train_pred = model.predict(X_train)
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, dataset_name="Train")
|
||||
|
||||
y_test_pred = model.predict(X_test)
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, dataset_name="Test")
|
||||
|
||||
return model
|
||||
|
||||
def train_AdaBoost(X_train, X_test, y_train, y_test, n_estimators=50, learning_rate=1.0):
|
||||
"""训练AdaBoost模型并返回模型对象"""
|
||||
from sklearn.ensemble import AdaBoostClassifier
|
||||
from sklearn.tree import DecisionTreeClassifier
|
||||
|
||||
base_estimator = DecisionTreeClassifier(max_depth=1)
|
||||
model = AdaBoostClassifier(
|
||||
base_estimator=base_estimator,
|
||||
n_estimators=n_estimators,
|
||||
learning_rate=learning_rate,
|
||||
random_state=1
|
||||
)
|
||||
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
y_train_pred = model.predict(X_train)
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, dataset_name="Train")
|
||||
|
||||
y_test_pred = model.predict(X_test)
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, dataset_name="Test")
|
||||
|
||||
return model
|
||||
|
||||
def train_KNN(X_train, X_test, y_train, y_test, n_neighbors=5, weights='uniform', algorithm='auto'):
|
||||
"""训练KNN模型并返回模型对象"""
|
||||
from sklearn.neighbors import KNeighborsClassifier
|
||||
model = KNeighborsClassifier(n_neighbors=n_neighbors, weights=weights, algorithm=algorithm)
|
||||
|
||||
cross_validate_model(model, X_train, y_train)
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
y_train_pred = model.predict(X_train)
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, dataset_name="Train")
|
||||
|
||||
y_test_pred = model.predict(X_test)
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, dataset_name="Test")
|
||||
|
||||
return model
|
||||
|
||||
# ==================== 统一的模型训练和保存函数 ====================
|
||||
|
||||
def train_and_save_model(model_name, X_train, X_test, y_train, y_test, model_save_dir, **kwargs):
|
||||
"""
|
||||
训练指定模型并保存
|
||||
:param model_name: 模型名称 ('SVM', 'LogisticRegression', 'PLS_DA', 'RF', 'XGBoost', 'LightGBM', 'CatBoost', 'AdaBoost', 'KNN')
|
||||
:param X_train: 训练特征
|
||||
:param X_test: 测试特征
|
||||
:param y_train: 训练标签
|
||||
:param y_test: 测试标签
|
||||
:param model_save_dir: 模型保存目录
|
||||
:param kwargs: 模型特定的超参数
|
||||
:return: 训练好的模型和保存路径
|
||||
"""
|
||||
model_trainers = {
|
||||
'SVM': SVM,
|
||||
'LogisticRegression': train_LogisticRegression,
|
||||
'PLS_DA': train_PLS_DA,
|
||||
'RF': train_RF,
|
||||
'XGBoost': train_XGBoost,
|
||||
'LightGBM': train_LightGBM,
|
||||
'CatBoost': train_CatBoost,
|
||||
'AdaBoost': train_AdaBoost,
|
||||
'KNN': train_KNN
|
||||
}
|
||||
|
||||
if model_name not in model_trainers:
|
||||
raise ValueError(f"Unsupported model: {model_name}. Supported models: {list(model_trainers.keys())}")
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Training {model_name} model...")
|
||||
print(f"{'='*60}")
|
||||
|
||||
# 训练模型
|
||||
trainer = model_trainers[model_name]
|
||||
model = trainer(X_train, X_test, y_train, y_test, **kwargs)
|
||||
|
||||
# 保存模型
|
||||
os.makedirs(model_save_dir, exist_ok=True)
|
||||
model_path = os.path.join(model_save_dir, f"{model_name.lower()}.m")
|
||||
save_model(model, model_path, model_type=model_name)
|
||||
|
||||
return model, model_path
|
||||
|
||||
# ==================== 针对不同模型的预测函数 ====================
|
||||
|
||||
def predict_with_model(df, model_path, model_type='SVM', ProcessMethods1='SS', ProcessMethods2='SG'):
|
||||
"""
|
||||
使用指定模型进行预测
|
||||
:param df: 包含反射率和形状特征的dataframe
|
||||
:param model_path: 模型路径
|
||||
:param model_type: 模型类型
|
||||
:param ProcessMethods1: 第一次预处理方法,默认为'SS'(当为'SS'时会加载scaler_params.pkl)
|
||||
:param ProcessMethods2: 第二次预处理方法,默认为'SG'
|
||||
:return: 包含预测结果的dataframe
|
||||
"""
|
||||
return predict_and_save(df, model_path, model_type=model_type, ProcessMethods1=ProcessMethods1, ProcessMethods2=ProcessMethods2)
|
||||
|
||||
# 主函数,用于训练
|
||||
if __name__ == "__main__":
|
||||
# 使用 pandas 读取 CSV 文件
|
||||
file_path = r"E:\code\plastic\plastic20260224\plastic\plastic\output\20260224\all.csv"
|
||||
df = pd.read_csv(
|
||||
file_path,
|
||||
encoding='utf-8', # 指定编码,如果出错可尝试 'gbk' 或 'gb18030'
|
||||
low_memory=False # 避免数据类型推断问题
|
||||
)
|
||||
|
||||
# 使用 pandas 选择要删除的列(第93到117列,索引从0开始)
|
||||
cols_to_remove = df.columns[np.r_[1:5, 87:110, 166:169]]
|
||||
|
||||
# 使用 pandas 删除指定列
|
||||
df_filtered = df.drop(columns=cols_to_remove)
|
||||
|
||||
# 使用 pandas 提取特征数据(从第2列开始到最后,排除第一列标签列)
|
||||
x = df_filtered.iloc[:, 1:]
|
||||
# x = df.iloc[:, 1:]
|
||||
# 使用 pandas 提取标签(第一列)
|
||||
y = df.iloc[:, 0]
|
||||
X_train, X_test, y_train, y_test = SpectralQualitativeAnalysis(x, y, 'SS', 'None', 'None', 'random', use_smote=True)
|
||||
|
||||
# # # 网格搜索 SVM 模型并对不同核函数进行三维可视化
|
||||
# param_grid = {
|
||||
# 'C': np.logspace(-3, 3, 13), # 在 10^(-3) 到 10^3 范围内生成 13 个值
|
||||
# 'gamma': np.logspace(-4, 1, 13), # 在 10^(-4) 到 10^1 范围内生成 13 个值
|
||||
# 'kernel': ['rbf'] # 针对 RBF 核
|
||||
# }
|
||||
# clf = SVM_with_kernels_visualization(X_train, X_test, y_train, y_test, param_grid)
|
||||
# joblib.dump(clf, "./classification_model/model_save/pre_salinas_MODEL.m")
|
||||
# clf1 = joblib.load("./classification_model/model_save/pre_salinas_MODEL.m")
|
||||
|
||||
# 示例1: 训练并保存SVM模型(旧方法,仍然支持)
|
||||
# clf = SVM(X_train, X_test, y_train, y_test)
|
||||
# save_model(clf, r"D:\WQ\plastic\classification_model\modelsave\svm.m", model_type='SVM')
|
||||
|
||||
# 示例2: 使用统一的训练和保存函数(推荐)
|
||||
save_dir = r"E:\code\plastic\plastic20260224\plastic\plastic\output\20260224\modelsave"
|
||||
|
||||
# 训练并保存多个模型
|
||||
models_to_train = ['SVM']#'SVM', 'RF', 'XGBoost', 'LogisticRegression'
|
||||
for model_name in models_to_train:
|
||||
model, model_path = train_and_save_model(
|
||||
model_name=model_name,
|
||||
X_train=X_train,
|
||||
X_test=X_test,
|
||||
y_train=y_train,
|
||||
y_test=y_test,
|
||||
model_save_dir=save_dir
|
||||
)
|
||||
print(f"{model_name} model saved at: {model_path}")
|
||||
|
||||
# 示例3: 加载模型并进行预测
|
||||
# model_path = r"D:\WQ\plastic\classification_model\modelsave\svm.m"
|
||||
# loaded_model = load_model(model_path)
|
||||
# # 预测时使用与训练时相同的预处理方法
|
||||
# # ProcessMethods1='SS' 时会自动加载scaler_params.pkl
|
||||
# # ProcessMethods2='SG' 应用Savitzky-Golay滤波
|
||||
# predictions_df = predict_with_model(df, model_path, model_type='SVM', ProcessMethods1='SS', ProcessMethods2='SG')
|
||||
# print(f"Predictions completed. Results shape: {predictions_df.shape}")
|
||||
|
||||
|
||||
831
classification_model/Parallel/test.py
Normal file
831
classification_model/Parallel/test.py
Normal file
@ -0,0 +1,831 @@
|
||||
|
||||
from imblearn.over_sampling import SMOTE
|
||||
import pandas as pd
|
||||
from classification_model.DataLoad.DataLoad import SetSplit, LoadNirtest
|
||||
from classification_model.Preprocessing.Preprocessing import Preprocessing
|
||||
from classification_model.WaveSelect.WaveSelcet import SpctrumFeatureSelcet
|
||||
from classification_model.Classification.ClassicCls import (
|
||||
LogisticRegressionModel, SVM as SVM_Classic, PLS_DA, RF,
|
||||
XGBoost, LightGBM, CatBoost, AdaBoost, KNN
|
||||
)
|
||||
import warnings
|
||||
warnings.filterwarnings("ignore", category=FutureWarning)
|
||||
import sklearn.svm as svm
|
||||
from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split
|
||||
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, precision_score, recall_score, f1_score
|
||||
import numpy as np
|
||||
import joblib
|
||||
import os
|
||||
from sklearn.svm import SVC
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn.metrics import classification_report, confusion_matrix
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
def cross_validate_model(model, X, y, cv=5):
|
||||
"""
|
||||
|
||||
:param model: 模型
|
||||
:param X:
|
||||
:param y:
|
||||
:param cv: 折数
|
||||
:return:
|
||||
"""
|
||||
scores = cross_val_score(model, X, y, cv=cv)
|
||||
print(f"Cross-validation accuracy: {scores.mean():.4f} ± {scores.std():.4f}")
|
||||
return scores
|
||||
|
||||
|
||||
# ==================== 光谱数据增强模块 ====================
|
||||
|
||||
def augment_spectrum(spectrum, noise_level=0.01, offset_range=0.02, multiplier_range=(0.95, 1.05), slope_range=(-0.001, 0.001), random_state=None):
|
||||
"""
|
||||
对单个光谱进行数据增强,包括添加随机噪声、偏移量、乘法和斜率的随机变化
|
||||
|
||||
:param spectrum: 单个光谱数据(1D数组)
|
||||
:param noise_level: 噪声水平(相对于光谱值的标准差比例),默认0.01(1%)
|
||||
:param offset_range: 偏移量范围(绝对值),默认0.02
|
||||
:param multiplier_range: 乘法因子范围(最小值,最大值),默认(0.95, 1.05)
|
||||
:param slope_range: 斜率变化范围(最小值,最大值),默认(-0.001, 0.001)
|
||||
:param random_state: 随机种子,用于可重复性
|
||||
:return: 增强后的光谱数据
|
||||
"""
|
||||
if random_state is not None:
|
||||
np.random.seed(random_state)
|
||||
|
||||
spectrum = np.array(spectrum).flatten()
|
||||
n_features = len(spectrum)
|
||||
|
||||
# 1. 添加随机噪声(高斯噪声)
|
||||
noise = np.random.normal(0, noise_level * np.std(spectrum), n_features)
|
||||
augmented = spectrum + noise
|
||||
|
||||
# 2. 添加偏移量(基线偏移)
|
||||
offset = np.random.uniform(-offset_range, offset_range)
|
||||
augmented = augmented + offset
|
||||
|
||||
# 3. 乘法变化(乘性散射校正的变体)
|
||||
multiplier = np.random.uniform(multiplier_range[0], multiplier_range[1])
|
||||
augmented = augmented * multiplier
|
||||
|
||||
# 4. 斜率变化(线性基线漂移)
|
||||
slope = np.random.uniform(slope_range[0], slope_range[1])
|
||||
x_indices = np.arange(n_features)
|
||||
augmented = augmented + slope * x_indices
|
||||
|
||||
return augmented
|
||||
|
||||
|
||||
def augment_dataset(X, y, augmentation_factor=1, noise_level=0.01, offset_range=0.02,
|
||||
multiplier_range=(0.95, 1.05), slope_range=(-0.001, 0.001),
|
||||
random_state=None, preserve_original=True):
|
||||
"""
|
||||
对整个数据集进行光谱数据增强
|
||||
|
||||
:param X: 特征数据(n_samples, n_features)
|
||||
:param y: 标签数据(n_samples,)
|
||||
:param augmentation_factor: 增强倍数,每个样本生成augmentation_factor个增强样本,默认1
|
||||
:param noise_level: 噪声水平,默认0.01
|
||||
:param offset_range: 偏移量范围,默认0.02
|
||||
:param multiplier_range: 乘法因子范围,默认(0.95, 1.05)
|
||||
:param slope_range: 斜率变化范围,默认(-0.001, 0.001)
|
||||
:param random_state: 随机种子,默认None
|
||||
:param preserve_original: 是否保留原始数据,默认True
|
||||
:return: 增强后的特征数据和标签数据 (X_augmented, y_augmented)
|
||||
"""
|
||||
# 确保 X 是密集的 numpy 数组
|
||||
if hasattr(X, 'toarray'): # 处理稀疏矩阵
|
||||
X = X.toarray()
|
||||
else:
|
||||
X = np.array(X)
|
||||
|
||||
# 确保 X 是2D数组
|
||||
if X.ndim == 1:
|
||||
X = X.reshape(1, -1)
|
||||
|
||||
y = np.array(y).flatten()
|
||||
|
||||
if random_state is not None:
|
||||
np.random.seed(random_state)
|
||||
|
||||
augmented_X_list = []
|
||||
augmented_y_list = []
|
||||
|
||||
n_samples = X.shape[0]
|
||||
n_features = X.shape[1]
|
||||
|
||||
# 对每个样本进行处理
|
||||
for i in range(n_samples):
|
||||
# 获取当前样本(确保是1D数组)
|
||||
current_sample = np.array(X[i]).flatten()
|
||||
|
||||
# 如果保留原始数据,先添加原始样本
|
||||
if preserve_original:
|
||||
# 确保原始样本是2D数组 (1, n_features)
|
||||
original_sample = current_sample.reshape(1, -1)
|
||||
augmented_X_list.append(original_sample)
|
||||
augmented_y_list.append(y[i])
|
||||
|
||||
# 生成增强样本
|
||||
for j in range(augmentation_factor):
|
||||
# 为每个增强样本生成不同的随机种子
|
||||
if random_state is not None:
|
||||
seed = random_state + i * augmentation_factor + j
|
||||
else:
|
||||
seed = None
|
||||
augmented_spectrum = augment_spectrum(
|
||||
current_sample,
|
||||
noise_level=noise_level,
|
||||
offset_range=offset_range,
|
||||
multiplier_range=multiplier_range,
|
||||
slope_range=slope_range,
|
||||
random_state=seed
|
||||
)
|
||||
# 确保是2D数组 (1, n_features)
|
||||
augmented_X_list.append(augmented_spectrum.reshape(1, -1))
|
||||
augmented_y_list.append(y[i])
|
||||
|
||||
# 合并所有数据
|
||||
if len(augmented_X_list) > 0:
|
||||
X_augmented = np.vstack(augmented_X_list)
|
||||
y_augmented = np.array(augmented_y_list)
|
||||
else:
|
||||
X_augmented = X
|
||||
y_augmented = y
|
||||
|
||||
return X_augmented, y_augmented
|
||||
|
||||
|
||||
def augment_dataset_with_params(X, y, augmentation_params=None, random_state=None, preserve_original=True):
|
||||
"""
|
||||
使用参数字典对整个数据集进行光谱数据增强(更灵活的接口)
|
||||
|
||||
:param X: 特征数据(n_samples, n_features)
|
||||
:param y: 标签数据(n_samples,)
|
||||
:param augmentation_params: 增强参数字典,包含:
|
||||
- 'augmentation_factor': 增强倍数,默认1
|
||||
- 'noise_level': 噪声水平,默认0.01
|
||||
- 'offset_range': 偏移量范围,默认0.02
|
||||
- 'multiplier_range': 乘法因子范围,默认(0.95, 1.05)
|
||||
- 'slope_range': 斜率变化范围,默认(-0.001, 0.001)
|
||||
:param random_state: 随机种子,默认None
|
||||
:param preserve_original: 是否保留原始数据,默认True
|
||||
:return: 增强后的特征数据和标签数据 (X_augmented, y_augmented)
|
||||
"""
|
||||
if augmentation_params is None:
|
||||
augmentation_params = {}
|
||||
|
||||
return augment_dataset(
|
||||
X, y,
|
||||
augmentation_factor=augmentation_params.get('augmentation_factor', 1),
|
||||
noise_level=augmentation_params.get('noise_level', 0.01),
|
||||
offset_range=augmentation_params.get('offset_range', 0.02),
|
||||
multiplier_range=augmentation_params.get('multiplier_range', (0.95, 1.05)),
|
||||
slope_range=augmentation_params.get('slope_range', (-0.001, 0.001)),
|
||||
random_state=random_state,
|
||||
preserve_original=preserve_original
|
||||
)
|
||||
|
||||
|
||||
# 混淆矩阵与分类报告
|
||||
def evaluate_model(y_true, y_pred, dataset_name="Test", title="Confusion Matrix", cmap='Blues'):
|
||||
"""
|
||||
性能评估,包含分类报告和混淆矩阵,且标签从 1 开始。
|
||||
|
||||
参数:
|
||||
y_true -- 真实标签
|
||||
y_pred -- 预测标签
|
||||
dataset_name -- 数据集名称(如 "Train" 或 "Test")
|
||||
title -- 图表标题
|
||||
cmap -- 热力图颜色
|
||||
"""
|
||||
print(f"{dataset_name} Classification Report:")
|
||||
print(classification_report(y_true, y_pred))
|
||||
|
||||
# 计算混淆矩阵
|
||||
cm = confusion_matrix(y_true, y_pred)
|
||||
|
||||
# 绘制热力图(此部分可选择性取消注释以显示图形)
|
||||
# plt.figure(figsize=(8, 6))
|
||||
# ax = sns.heatmap(cm, annot=True, fmt='g', cmap=cmap, cbar=True,
|
||||
# linewidths=0.5, linecolor='black', square=True,
|
||||
# annot_kws={"size": 12})
|
||||
# ax.set_title(f"{dataset_name} {title}", fontsize=16)
|
||||
# ax.set_xlabel('Predicted Label', fontsize=14)
|
||||
# ax.set_ylabel('True Label', fontsize=14)
|
||||
# plt.tight_layout()
|
||||
# plt.show()
|
||||
|
||||
# 返回多个性能指标的字典,包括混淆矩阵
|
||||
return {
|
||||
"accuracy": accuracy_score(y_true, y_pred),
|
||||
"precision": precision_score(y_true, y_pred, average='weighted'),
|
||||
"recall": recall_score(y_true, y_pred, average='weighted'),
|
||||
"f1_score": f1_score(y_true, y_pred, average='weighted'),
|
||||
"confusion_matrix": cm
|
||||
}
|
||||
|
||||
|
||||
# 光谱定性分析
|
||||
def SpectralQualitativeAnalysis(data, label, ProcessMethods, ProcessMethods2, FslecetedMethods, SetSplitMethods,
|
||||
use_smote=False, use_augmentation=False, augmentation_params=None, random_state=42):
|
||||
"""
|
||||
光谱定性分析,支持数据增强
|
||||
|
||||
:param data: 输入数据
|
||||
:param label: 标签数据
|
||||
:param ProcessMethods: 第一种预处理方法
|
||||
:param ProcessMethods2: 第二种预处理方法
|
||||
:param FslecetedMethods: 特征选择方法
|
||||
:param SetSplitMethods: 数据划分方法
|
||||
:param use_smote: 是否使用SMOTE,默认False
|
||||
:param use_augmentation: 是否使用光谱数据增强,默认False
|
||||
:param augmentation_params: 数据增强参数字典,默认None(使用默认参数)
|
||||
:param random_state: 随机种子,默认42
|
||||
:return: X_train, X_test, y_train, y_test
|
||||
"""
|
||||
# 预处理
|
||||
ProcesedData = Preprocessing(ProcessMethods, data)
|
||||
ProcesedData2 = Preprocessing(ProcessMethods2, ProcesedData)
|
||||
|
||||
# 特征选择
|
||||
FeatrueData, labels, selected_columns = SpctrumFeatureSelcet(FslecetedMethods, ProcesedData2, label)
|
||||
|
||||
# 数据划分
|
||||
X_train, X_test, y_train, y_test = SetSplit(SetSplitMethods, FeatrueData, labels, test_size=0.3, randomseed=random_state)
|
||||
|
||||
# 使用光谱数据增强(在SMOTE之前应用)
|
||||
if use_augmentation:
|
||||
print(f"Original training set size: {len(y_train)}")
|
||||
X_train, y_train = augment_dataset_with_params(
|
||||
X_train, y_train,
|
||||
augmentation_params=augmentation_params,
|
||||
random_state=random_state,
|
||||
preserve_original=True
|
||||
)
|
||||
print(f"Training set size after augmentation: {len(y_train)}")
|
||||
|
||||
# 使用 SMOTE 增加少数类别样本
|
||||
if use_smote:
|
||||
smote = SMOTE(random_state=random_state)
|
||||
X_train, y_train = smote.fit_resample(X_train, y_train)
|
||||
print("SMOTE applied: Training set size after resampling:", len(y_train))
|
||||
|
||||
# 模型训练和评估
|
||||
|
||||
return X_train, X_test, y_train, y_test
|
||||
|
||||
def Procesed(data, ProcessMethods1, ProcessMethods2, model_path):
|
||||
"""
|
||||
对数据进行预处理,支持两种预处理方法
|
||||
|
||||
:param data: 输入数据
|
||||
:param ProcessMethods1: 第一种预处理方法(如 'SS', 'MMS', 'None'等)
|
||||
:param ProcessMethods2: 第二种预处理方法(如 'SG', 'D1', 'None'等)
|
||||
:param model_path: 模型路径(用于定位scaler_params.pkl)
|
||||
:return: 预处理后的数据
|
||||
"""
|
||||
import os
|
||||
from classification_model.Preprocessing import Preprocessing
|
||||
|
||||
# 第一步预处理
|
||||
if ProcessMethods1 == 'SS':
|
||||
# 当第一种预处理方法为SS时,需要加载保存的scaler
|
||||
model_dir = os.path.dirname(model_path)
|
||||
scaler_path = os.path.join(model_dir, 'scaler_params.pkl')
|
||||
if not os.path.exists(scaler_path):
|
||||
raise FileNotFoundError(f"Scaler file not found at {scaler_path}. Please ensure the model was trained with SS preprocessing.")
|
||||
loaded_scaler = joblib.load(scaler_path)
|
||||
transformed_data = loaded_scaler.transform(data)
|
||||
# 转换为DataFrame格式以便后续处理
|
||||
transformed_data_layout = pd.DataFrame(transformed_data)
|
||||
elif ProcessMethods1 == 'None' or ProcessMethods1 is None:
|
||||
# 如果第一种预处理方法为None,直接使用原始数据
|
||||
transformed_data_layout = pd.DataFrame(data) if not isinstance(data, pd.DataFrame) else data
|
||||
else:
|
||||
# 其他预处理方法直接调用Preprocessing函数
|
||||
transformed_data_layout = Preprocessing(ProcessMethods1, data)
|
||||
if isinstance(transformed_data_layout, np.ndarray):
|
||||
transformed_data_layout = pd.DataFrame(transformed_data_layout)
|
||||
|
||||
# 第二步预处理
|
||||
if ProcessMethods2 == 'None' or ProcessMethods2 is None:
|
||||
ProcesedData2 = transformed_data_layout
|
||||
else:
|
||||
ProcesedData2 = Preprocessing(ProcessMethods2, transformed_data_layout)
|
||||
if isinstance(ProcesedData2, np.ndarray):
|
||||
ProcesedData2 = pd.DataFrame(ProcesedData2)
|
||||
|
||||
return ProcesedData2
|
||||
|
||||
|
||||
def SVM_with_kernels_visualization(X_train, X_test, y_train, y_test, param_grid=None):
|
||||
"""
|
||||
针对不同核函数的 SVM 模型进行超参数调优,使用测试集验证最佳模型,并分别绘制三维网络图。
|
||||
|
||||
:param X_train: 训练集特征
|
||||
:param X_test: 测试集特征
|
||||
:param y_train: 训练集标签
|
||||
:param y_test: 测试集标签
|
||||
:param param_grid: 超参数网格,默认为 None。如果 None,使用默认参数范围。
|
||||
"""
|
||||
if param_grid is None:
|
||||
# 默认参数网格
|
||||
param_grid = {
|
||||
'C': [0.1, 1, 10, 100],
|
||||
'gamma': [1, 0.1, 0.01, 0.001],
|
||||
'kernel': ['linear', 'rbf', 'poly']
|
||||
}
|
||||
|
||||
# 初始化 SVM 模型
|
||||
svc = SVC()
|
||||
|
||||
# 网格搜索
|
||||
grid_search = GridSearchCV(estimator=svc, param_grid=param_grid, cv=5, scoring='accuracy', verbose=1, n_jobs=-1)
|
||||
grid_search.fit(X_train, y_train)
|
||||
|
||||
# 输出最优参数和对应的分数
|
||||
print("Best Parameters:", grid_search.best_params_)
|
||||
print("Best Cross-Validation Score:", grid_search.best_score_)
|
||||
|
||||
# 测试集验证最佳模型
|
||||
best_model = grid_search.best_estimator_
|
||||
y_test_pred = best_model.predict(X_test)
|
||||
|
||||
print("\nTest Set Evaluation:")
|
||||
print(classification_report(y_test, y_test_pred))
|
||||
print("Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))
|
||||
print(f"Test Accuracy: {accuracy_score(y_test, y_test_pred):.4f}")
|
||||
|
||||
# 获取搜索结果
|
||||
results = grid_search.cv_results_
|
||||
|
||||
# 按核函数类型分别绘制三维网格图
|
||||
kernels = np.unique(param_grid['kernel'])
|
||||
for kernel in kernels:
|
||||
kernel_indices = [i for i, params in enumerate(results['params']) if params['kernel'] == kernel]
|
||||
C_values = [results['params'][i]['C'] for i in kernel_indices]
|
||||
gamma_values = [results['params'][i]['gamma'] for i in kernel_indices if 'gamma' in results['params'][i]]
|
||||
scores = results['mean_test_score'][kernel_indices]
|
||||
|
||||
# 如果是线性核,不需要绘制 gamma 参数
|
||||
if kernel == 'linear':
|
||||
plot_linear_kernel(C_values, scores, kernel)
|
||||
else:
|
||||
plot_3D_grid(C_values, gamma_values, scores, kernel)
|
||||
|
||||
return best_model
|
||||
|
||||
def plot_3D_grid(C_values, gamma_values, scores, kernel):
|
||||
"""
|
||||
绘制三维超参数网络图(针对 RBF 和多项式核),添加颜色梯度。
|
||||
|
||||
:param C_values: C 参数的列表
|
||||
:param gamma_values: gamma 参数的列表
|
||||
:param scores: 对应的交叉验证分数
|
||||
:param kernel: 核函数名称
|
||||
"""
|
||||
# 将数据转化为网格形式
|
||||
C_unique = np.unique(C_values)
|
||||
gamma_unique = np.unique(gamma_values)
|
||||
C_grid, gamma_grid = np.meshgrid(C_unique, gamma_unique)
|
||||
|
||||
# 构建 Z 轴(对应交叉验证得分)
|
||||
Z = np.zeros_like(C_grid)
|
||||
for i, c in enumerate(C_unique):
|
||||
for j, gamma in enumerate(gamma_unique):
|
||||
indices = [k for k, val in enumerate(C_values) if val == c and gamma_values[k] == gamma]
|
||||
if indices:
|
||||
Z[j, i] = scores[indices[0]]
|
||||
|
||||
# 转换 C 和 gamma 为对数尺度
|
||||
log_C_grid = np.log10(C_grid)
|
||||
log_gamma_grid = np.log10(gamma_grid)
|
||||
|
||||
# 绘制三维表面图并添加颜色梯度
|
||||
fig = plt.figure(figsize=(12, 8))
|
||||
ax = fig.add_subplot(111, projection='3d')
|
||||
surface = ax.plot_surface(
|
||||
log_C_grid, log_gamma_grid, Z, cmap='viridis', edgecolor='k', alpha=0.8
|
||||
)
|
||||
|
||||
# 添加颜色条
|
||||
cbar = fig.colorbar(surface, pad=0.1, shrink=0.5, aspect=10)
|
||||
cbar.set_label('Mean Accuracy', fontsize=12)
|
||||
|
||||
# 设置坐标轴和标题
|
||||
ax.set_title(f'3D Hyperparameter Grid ({kernel} kernel)', fontsize=16)
|
||||
ax.set_xlabel('Log10(C)', fontsize=12)
|
||||
ax.set_ylabel('Log10(Gamma)', fontsize=12)
|
||||
ax.set_zlabel('Mean Accuracy', fontsize=12)
|
||||
|
||||
# 显示图形
|
||||
plt.show()
|
||||
|
||||
def plot_linear_kernel(C_values, scores, kernel):
|
||||
"""
|
||||
绘制线性核的超参数网络图(仅针对 C 参数)。
|
||||
|
||||
:param C_values: C 参数的列表
|
||||
:param scores: 对应的交叉验证分数
|
||||
:param kernel: 核函数名称
|
||||
"""
|
||||
# 将 C 转换为对数尺度
|
||||
C_values = np.log10(C_values)
|
||||
|
||||
# 创建二维折线图
|
||||
plt.figure(figsize=(8, 6))
|
||||
plt.plot(C_values, scores, marker='o', label='Mean Accuracy')
|
||||
plt.xlabel('Log10(C)', fontsize=12)
|
||||
plt.ylabel('Mean Accuracy', fontsize=12)
|
||||
plt.title(f'Hyperparameter Tuning ({kernel} kernel)', fontsize=16)
|
||||
plt.grid(True)
|
||||
plt.legend()
|
||||
plt.show()
|
||||
|
||||
# 分类并填充结果到标签数组
|
||||
def classify_and_fill(segments, superpixel_features, model, label_array):
|
||||
"""
|
||||
|
||||
:param segments:
|
||||
:param superpixel_features:
|
||||
:param model: 模型
|
||||
:param label_array:
|
||||
:return: 类别列
|
||||
"""
|
||||
for segment, feature in superpixel_features.items():
|
||||
# 将高光谱平均特征输入模型,预测类别
|
||||
label = model.predict([feature])[0]
|
||||
# 填充到标签数组的对应位置
|
||||
label_array[segments == segment] = label
|
||||
return label_array
|
||||
|
||||
def save_model(model, model_path, model_type='SVM'):
|
||||
"""
|
||||
保存模型到指定路径
|
||||
:param model: 训练好的模型对象
|
||||
:param model_path: 模型保存路径
|
||||
:param model_type: 模型类型(用于文件命名)
|
||||
:return: 保存的完整路径
|
||||
"""
|
||||
os.makedirs(os.path.dirname(model_path), exist_ok=True)
|
||||
joblib.dump(model, model_path)
|
||||
print(f"{model_type} model saved to: {model_path}")
|
||||
return model_path
|
||||
|
||||
def load_model(model_path):
|
||||
"""
|
||||
加载模型(支持所有模型类型)
|
||||
:param model_path: 模型路径
|
||||
:return: 加载的模型
|
||||
"""
|
||||
return joblib.load(model_path)
|
||||
|
||||
def predict_and_save(df, model_path, model_type='SVM', ProcessMethods1='SS', ProcessMethods2='SG'):
|
||||
"""
|
||||
预测模型(支持所有模型类型)
|
||||
:param df: 包含反射率和形状特征的dataframe
|
||||
:param model_path: 模型的路径
|
||||
:param model_type: 模型类型(可选,用于特殊处理)
|
||||
:param ProcessMethods1: 第一次预处理方法,默认为'SS'(当为'SS'时会加载scaler_params.pkl)
|
||||
:param ProcessMethods2: 第二次预处理方法,默认为'SG'
|
||||
:return: 包含预测类别列的dataframe
|
||||
"""
|
||||
model = load_model(model_path)
|
||||
|
||||
# 找到轮廓列的索引
|
||||
contour_col_idx = None
|
||||
if 'contour' in df.columns:
|
||||
contour_col_idx = df.columns.get_loc('contour')
|
||||
|
||||
# 选择所有数值列(排除轮廓列)
|
||||
numeric_cols = []
|
||||
for i in range(1, df.shape[1]): # 跳过第一列(可能是类别或ID)
|
||||
if i != contour_col_idx:
|
||||
col_name = df.columns[i]
|
||||
# 只选择数值类型的列
|
||||
if df[col_name].dtype in ['int64', 'float64']:
|
||||
numeric_cols.append(col_name)
|
||||
|
||||
# 加载数据
|
||||
x = df[numeric_cols]
|
||||
|
||||
# 进行预处理(支持两种预处理方法)
|
||||
Procesed_features = Procesed(x, ProcessMethods1, ProcessMethods2, model_path)
|
||||
|
||||
# 确保Procesed_features是numpy数组格式供模型预测
|
||||
if isinstance(Procesed_features, pd.DataFrame):
|
||||
Procesed_features = Procesed_features.values
|
||||
|
||||
# 进行预测
|
||||
predictions = model.predict(Procesed_features)
|
||||
df['Predictions'] = predictions
|
||||
|
||||
return df
|
||||
|
||||
def SVM(X_train, X_test, y_train, y_test, kernel='linear', C=1, gamma=1e-3):
|
||||
clf = svm.SVC(C=C, kernel=kernel, gamma=gamma)
|
||||
|
||||
# 交叉验证
|
||||
cross_validate_model(clf, X_train, y_train)
|
||||
|
||||
# 模型拟合
|
||||
clf.fit(X_train, y_train.ravel())
|
||||
|
||||
# 训练集评估
|
||||
y_train_pred = clf.predict(X_train)
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, dataset_name="Train")
|
||||
|
||||
# 测试集评估
|
||||
y_test_pred = clf.predict(X_test)
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, dataset_name="Test")
|
||||
|
||||
return clf
|
||||
|
||||
# ==================== 所有模型的训练函数(返回模型对象)====================
|
||||
|
||||
def train_LogisticRegression(X_train, X_test, y_train, y_test, penalty='l2', C=1.0, solver='lbfgs', max_iter=200):
|
||||
"""训练逻辑回归模型并返回模型对象"""
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
model = LogisticRegression(penalty=penalty, C=C, solver=solver, max_iter=max_iter, multi_class='multinomial', random_state=1)
|
||||
|
||||
cross_validate_model(model, X_train, y_train)
|
||||
model.fit(X_train, y_train.ravel())
|
||||
|
||||
y_train_pred = model.predict(X_train)
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, dataset_name="Train")
|
||||
|
||||
y_test_pred = model.predict(X_test)
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, dataset_name="Test")
|
||||
|
||||
return model
|
||||
|
||||
def train_PLS_DA(X_train, X_test, y_train, y_test, n_components=40):
|
||||
"""训练PLS-DA模型并返回模型对象"""
|
||||
from sklearn.cross_decomposition import PLSRegression
|
||||
y_train_encoded = pd.get_dummies(y_train)
|
||||
model = PLSRegression(n_components=n_components)
|
||||
|
||||
model.fit(X_train, y_train_encoded)
|
||||
|
||||
y_train_pred = model.predict(X_train)
|
||||
y_train_pred = np.argmax(y_train_pred, axis=1)
|
||||
train_metrics = evaluate_model(np.argmax(y_train_encoded.values, axis=1), y_train_pred, dataset_name="Train")
|
||||
|
||||
y_test_pred = model.predict(X_test)
|
||||
y_test_pred = np.argmax(y_test_pred, axis=1)
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, dataset_name="Test")
|
||||
|
||||
return model
|
||||
|
||||
def train_RF(X_train, X_test, y_train, y_test, n_estimators=200, max_depth=15, n_jobs=-1):
|
||||
"""训练随机森林模型并返回模型对象"""
|
||||
from sklearn.ensemble import RandomForestClassifier
|
||||
model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=1, n_jobs=n_jobs)
|
||||
|
||||
cross_validate_model(model, X_train, y_train, n_jobs=n_jobs)
|
||||
model.fit(X_train, y_train.ravel())
|
||||
|
||||
y_train_pred = model.predict(X_train)
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, dataset_name="Train")
|
||||
|
||||
y_test_pred = model.predict(X_test)
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, dataset_name="Test")
|
||||
|
||||
return model
|
||||
|
||||
def train_XGBoost(X_train, X_test, y_train, y_test, n_estimators=100, learning_rate=0.1, max_depth=3):
|
||||
"""训练XGBoost模型并返回模型对象"""
|
||||
import xgboost as xgb
|
||||
model = xgb.XGBClassifier(
|
||||
n_estimators=n_estimators,
|
||||
learning_rate=learning_rate,
|
||||
max_depth=max_depth,
|
||||
random_state=1,
|
||||
gpu_id=0
|
||||
)
|
||||
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
y_train_pred = model.predict(X_train)
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, dataset_name="Train")
|
||||
|
||||
y_test_pred = model.predict(X_test)
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, dataset_name="Test")
|
||||
|
||||
return model
|
||||
|
||||
def train_LightGBM(X_train, X_test, y_train, y_test, n_estimators=100, learning_rate=0.1, max_depth=-1, num_leaves=31):
|
||||
"""训练LightGBM模型并返回模型对象"""
|
||||
import lightgbm as lgb
|
||||
model = lgb.LGBMClassifier(
|
||||
n_estimators=n_estimators,
|
||||
learning_rate=learning_rate,
|
||||
max_depth=max_depth,
|
||||
num_leaves=num_leaves,
|
||||
random_state=1
|
||||
)
|
||||
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
y_train_pred = model.predict(X_train)
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, dataset_name="Train")
|
||||
|
||||
y_test_pred = model.predict(X_test)
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, dataset_name="Test")
|
||||
|
||||
return model
|
||||
|
||||
def train_CatBoost(X_train, X_test, y_train, y_test, iterations=500, learning_rate=0.1, depth=6):
|
||||
"""训练CatBoost模型并返回模型对象"""
|
||||
import catboost as cb
|
||||
model = cb.CatBoostClassifier(
|
||||
iterations=iterations,
|
||||
learning_rate=learning_rate,
|
||||
depth=depth,
|
||||
random_seed=1,
|
||||
verbose=0
|
||||
)
|
||||
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
y_train_pred = model.predict(X_train)
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, dataset_name="Train")
|
||||
|
||||
y_test_pred = model.predict(X_test)
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, dataset_name="Test")
|
||||
|
||||
return model
|
||||
|
||||
def train_AdaBoost(X_train, X_test, y_train, y_test, n_estimators=50, learning_rate=1.0):
|
||||
"""训练AdaBoost模型并返回模型对象"""
|
||||
from sklearn.ensemble import AdaBoostClassifier
|
||||
from sklearn.tree import DecisionTreeClassifier
|
||||
|
||||
base_estimator = DecisionTreeClassifier(max_depth=1)
|
||||
model = AdaBoostClassifier(
|
||||
base_estimator=base_estimator,
|
||||
n_estimators=n_estimators,
|
||||
learning_rate=learning_rate,
|
||||
random_state=1
|
||||
)
|
||||
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
y_train_pred = model.predict(X_train)
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, dataset_name="Train")
|
||||
|
||||
y_test_pred = model.predict(X_test)
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, dataset_name="Test")
|
||||
|
||||
return model
|
||||
|
||||
def train_KNN(X_train, X_test, y_train, y_test, n_neighbors=5, weights='uniform', algorithm='auto'):
|
||||
"""训练KNN模型并返回模型对象"""
|
||||
from sklearn.neighbors import KNeighborsClassifier
|
||||
model = KNeighborsClassifier(n_neighbors=n_neighbors, weights=weights, algorithm=algorithm)
|
||||
|
||||
cross_validate_model(model, X_train, y_train)
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
y_train_pred = model.predict(X_train)
|
||||
train_metrics = evaluate_model(y_train, y_train_pred, dataset_name="Train")
|
||||
|
||||
y_test_pred = model.predict(X_test)
|
||||
test_metrics = evaluate_model(y_test, y_test_pred, dataset_name="Test")
|
||||
|
||||
return model
|
||||
|
||||
# ==================== 统一的模型训练和保存函数 ====================
|
||||
|
||||
def train_and_save_model(model_name, X_train, X_test, y_train, y_test, model_save_dir, **kwargs):
|
||||
"""
|
||||
训练指定模型并保存
|
||||
:param model_name: 模型名称 ('SVM', 'LogisticRegression', 'PLS_DA', 'RF', 'XGBoost', 'LightGBM', 'CatBoost', 'AdaBoost', 'KNN')
|
||||
:param X_train: 训练特征
|
||||
:param X_test: 测试特征
|
||||
:param y_train: 训练标签
|
||||
:param y_test: 测试标签
|
||||
:param model_save_dir: 模型保存目录
|
||||
:param kwargs: 模型特定的超参数
|
||||
:return: 训练好的模型和保存路径
|
||||
"""
|
||||
model_trainers = {
|
||||
'SVM': SVM,
|
||||
'LogisticRegression': train_LogisticRegression,
|
||||
'PLS_DA': train_PLS_DA,
|
||||
'RF': train_RF,
|
||||
'XGBoost': train_XGBoost,
|
||||
'LightGBM': train_LightGBM,
|
||||
'CatBoost': train_CatBoost,
|
||||
'AdaBoost': train_AdaBoost,
|
||||
'KNN': train_KNN
|
||||
}
|
||||
|
||||
if model_name not in model_trainers:
|
||||
raise ValueError(f"Unsupported model: {model_name}. Supported models: {list(model_trainers.keys())}")
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Training {model_name} model...")
|
||||
print(f"{'='*60}")
|
||||
|
||||
# 训练模型
|
||||
trainer = model_trainers[model_name]
|
||||
model = trainer(X_train, X_test, y_train, y_test, **kwargs)
|
||||
|
||||
# 保存模型
|
||||
os.makedirs(model_save_dir, exist_ok=True)
|
||||
model_path = os.path.join(model_save_dir, f"{model_name.lower()}.m")
|
||||
save_model(model, model_path, model_type=model_name)
|
||||
|
||||
return model, model_path
|
||||
|
||||
# ==================== 针对不同模型的预测函数 ====================
|
||||
|
||||
def predict_with_model(df, model_path, model_type='SVM', ProcessMethods1='SS', ProcessMethods2='SG'):
|
||||
"""
|
||||
使用指定模型进行预测
|
||||
:param df: 包含反射率和形状特征的dataframe
|
||||
:param model_path: 模型路径
|
||||
:param model_type: 模型类型
|
||||
:param ProcessMethods1: 第一次预处理方法,默认为'SS'(当为'SS'时会加载scaler_params.pkl)
|
||||
:param ProcessMethods2: 第二次预处理方法,默认为'SG'
|
||||
:return: 包含预测结果的dataframe
|
||||
"""
|
||||
return predict_and_save(df, model_path, model_type=model_type, ProcessMethods1=ProcessMethods1, ProcessMethods2=ProcessMethods2)
|
||||
# 主函数,用于训练
|
||||
if __name__ == "__main__":
|
||||
# 加载 SVM 模型
|
||||
data = pd.read_csv(r"E:\plastic\plastic\output\20251113\数据增强\all.csv")
|
||||
df = pd.DataFrame(data)
|
||||
# x = df.iloc[:, 1:]
|
||||
# x = df.iloc[:, np.r_[1:94, 119:]].values
|
||||
cols_to_remove = df.columns[87:110]
|
||||
|
||||
# 删除这些列
|
||||
df_filtered = df.drop(columns=cols_to_remove)
|
||||
|
||||
# 提取数据(保持列名)
|
||||
x = df_filtered.iloc[:, 1:].values
|
||||
y = df.iloc[:, 0]
|
||||
|
||||
# 示例:不使用数据增强(原始方式)
|
||||
# X_train, X_test, y_train, y_test = SpectralQualitativeAnalysis(x, y, 'None', 'None', 'None', 'random', use_smote=True)
|
||||
|
||||
# 示例:使用光谱数据增强(推荐)
|
||||
# 定义数据增强参数
|
||||
augmentation_params = {
|
||||
'augmentation_factor': 2, # 每个样本生成2个增强样本
|
||||
'noise_level': 0.01, # 噪声水平:1%
|
||||
'offset_range': 0.02, # 偏移量范围:±0.02
|
||||
'multiplier_range': (0.9, 1.1), # 乘法因子范围:0.95-1.05
|
||||
'slope_range': (0, 0.1) # 斜率变化范围:±0.001
|
||||
}
|
||||
|
||||
X_train, X_test, y_train, y_test = SpectralQualitativeAnalysis(
|
||||
x, y, 'D1', 'None', 'None', 'random',
|
||||
use_smote=True,
|
||||
use_augmentation=False, # 启用数据增强
|
||||
augmentation_params=augmentation_params,
|
||||
random_state=42
|
||||
)
|
||||
|
||||
# # # 网格搜索 SVM 模型并对不同核函数进行三维可视化
|
||||
# param_grid = {
|
||||
# 'C': np.logspace(-3, 3, 13), # 在 10^(-3) 到 10^3 范围内生成 13 个值
|
||||
# 'gamma': np.logspace(-4, 1, 13), # 在 10^(-4) 到 10^1 范围内生成 13 个值
|
||||
# 'kernel': ['rbf'] # 针对 RBF 核
|
||||
# }
|
||||
# clf = SVM_with_kernels_visualization(X_train, X_test, y_train, y_test, param_grid)
|
||||
# joblib.dump(clf, "./classification_model/model_save/pre_salinas_MODEL.m")
|
||||
# clf1 = joblib.load("./classification_model/model_save/pre_salinas_MODEL.m")
|
||||
|
||||
# 示例1: 训练并保存SVM模型(旧方法,仍然支持)
|
||||
# clf = SVM(X_train, X_test, y_train, y_test)
|
||||
# save_model(clf, r"D:\WQ\plastic\classification_model\modelsave\svm.m", model_type='SVM')
|
||||
|
||||
# 示例2: 使用统一的训练和保存函数(推荐)
|
||||
save_dir = r"E:\plastic\plastic\output\20251113\一阶导数"
|
||||
|
||||
# 训练并保存多个模型
|
||||
models_to_train = ['CatBoost'] # 'SVM', 'RF', 'XGBoost', 'LogisticRegression'
|
||||
for model_name in models_to_train:
|
||||
model, model_path = train_and_save_model(
|
||||
model_name=model_name,
|
||||
X_train=X_train,
|
||||
X_test=X_test,
|
||||
y_train=y_train,
|
||||
y_test=y_test,
|
||||
model_save_dir=save_dir
|
||||
)
|
||||
print(f"{model_name} model saved at: {model_path}")
|
||||
|
||||
# 示例3: 加载模型并进行预测
|
||||
# model_path = r"D:\WQ\plastic\classification_model\modelsave\svm.m"
|
||||
# loaded_model = load_model(model_path)
|
||||
# # 预测时使用与训练时相同的预处理方法
|
||||
# # ProcessMethods1='SS' 时会自动加载scaler_params.pkl
|
||||
# # ProcessMethods2='SG' 应用Savitzky-Golay滤波
|
||||
# predictions_df = predict_with_model(df, model_path, model_type='SVM', ProcessMethods1='SS', ProcessMethods2='SG')
|
||||
# print(f"Predictions completed. Results shape: {predictions_df.shape}")
|
||||
|
||||
|
||||
157
classification_model/Preprocessing/Preprocessing.py
Normal file
157
classification_model/Preprocessing/Preprocessing.py
Normal file
@ -0,0 +1,157 @@
|
||||
import numpy as np
|
||||
from scipy import signal
|
||||
from sklearn.linear_model import LinearRegression
|
||||
from sklearn.preprocessing import MinMaxScaler, StandardScaler
|
||||
import pandas as pd
|
||||
import pywt
|
||||
from copy import deepcopy
|
||||
import joblib # 用于保存和加载模型
|
||||
# 最大最小值归一化
|
||||
def MMS(input_spectrum):
|
||||
output_spectrum = MinMaxScaler().fit_transform(input_spectrum)
|
||||
return output_spectrum
|
||||
|
||||
# 标准化
|
||||
def SS(input_spectrum, save_path=None):
|
||||
# 初始化 StandardScaler 并拟合数据
|
||||
scaler = StandardScaler()
|
||||
output_spectrum = scaler.fit_transform(input_spectrum)
|
||||
|
||||
# 如果指定了保存路径,保存 scaler 对象
|
||||
if save_path:
|
||||
joblib.dump(scaler, save_path)
|
||||
print(f"Scaler parameters saved to {save_path}")
|
||||
|
||||
return output_spectrum
|
||||
|
||||
# 均值中心化
|
||||
def CT(input_spectrum):
|
||||
output_spectrum = deepcopy(input_spectrum)
|
||||
for i in range(output_spectrum.shape[0]):
|
||||
MEAN = np.mean(output_spectrum[i])
|
||||
output_spectrum[i] = output_spectrum[i] - MEAN
|
||||
return output_spectrum
|
||||
|
||||
# 标准正态变换
|
||||
def SNV(input_spectrum):
|
||||
if not isinstance(input_spectrum, pd.DataFrame):
|
||||
raise ValueError("Input spectrum must be a Pandas DataFrame")
|
||||
data_average = input_spectrum.mean(axis=1)
|
||||
data_std = input_spectrum.std(axis=1)
|
||||
data_std = data_std.replace(0, 1)
|
||||
output_spectrum = (input_spectrum.sub(data_average, axis=0)).div(data_std, axis=0)
|
||||
return output_spectrum
|
||||
|
||||
# 移动平均平滑
|
||||
def MA(input_spectrum, WSZ=11):
|
||||
output_spectrum = deepcopy(input_spectrum)
|
||||
for i in range(output_spectrum.shape[0]):
|
||||
out0 = np.convolve(output_spectrum[i], np.ones(WSZ, dtype=int), 'valid') / WSZ
|
||||
r = np.arange(1, WSZ - 1, 2)
|
||||
start = np.cumsum(output_spectrum[i, :WSZ - 1])[::2] / r
|
||||
stop = (np.cumsum(output_spectrum[i, :-WSZ:-1])[::2] / r)[::-1]
|
||||
output_spectrum[i] = np.concatenate((start, out0, stop))
|
||||
return output_spectrum
|
||||
|
||||
# Savitzky-Golay平滑滤波
|
||||
def SG(input_spectrum, w=15, p=2):
|
||||
output_spectrum = signal.savgol_filter(input_spectrum, w, p)
|
||||
return output_spectrum
|
||||
|
||||
# 一阶导数
|
||||
def D1(input_spectrum):
|
||||
n, p = input_spectrum.shape
|
||||
output_spectrum = np.ones((n, p - 1))
|
||||
for i in range(n):
|
||||
output_spectrum[i] = np.diff(input_spectrum[i])
|
||||
return output_spectrum
|
||||
|
||||
# 二阶导数
|
||||
def D2(input_spectrum):
|
||||
temp2 = (pd.DataFrame(input_spectrum)).diff(axis=1)
|
||||
temp3 = np.delete(temp2.values, 0, axis=1)
|
||||
temp4 = (pd.DataFrame(temp3)).diff(axis=1)
|
||||
output_spectrum = np.delete(temp4.values, 0, axis=1)
|
||||
return output_spectrum
|
||||
|
||||
# 趋势校正
|
||||
def DT(input_spectrum):
|
||||
lenth = input_spectrum.shape[1]
|
||||
x = np.asarray(range(lenth), dtype=np.float32)
|
||||
output_spectrum = np.array(input_spectrum)
|
||||
l = LinearRegression()
|
||||
for i in range(output_spectrum.shape[0]):
|
||||
l.fit(x.reshape(-1, 1), output_spectrum[i].reshape(-1, 1))
|
||||
k = l.coef_
|
||||
b = l.intercept_
|
||||
for j in range(output_spectrum.shape[1]):
|
||||
output_spectrum[i][j] = output_spectrum[i][j] - (j * k + b)
|
||||
return output_spectrum
|
||||
|
||||
# 多元散射校正
|
||||
def MSC(input_spectrum):
|
||||
n, p = input_spectrum.shape
|
||||
output_spectrum = np.ones((n, p))
|
||||
mean = np.mean(input_spectrum, axis=0)
|
||||
for i in range(n):
|
||||
y = input_spectrum[i, :]
|
||||
l = LinearRegression()
|
||||
l.fit(mean.reshape(-1, 1), y.reshape(-1, 1))
|
||||
k = l.coef_
|
||||
b = l.intercept_
|
||||
output_spectrum[i, :] = (y - b) / k
|
||||
return output_spectrum
|
||||
|
||||
# 小波变换
|
||||
def wave(input_spectrum):
|
||||
def wave_(input_spectrum_row):
|
||||
w = pywt.Wavelet('db8')
|
||||
maxlev = pywt.dwt_max_level(len(input_spectrum_row), w.dec_len)
|
||||
coeffs = pywt.wavedec(input_spectrum_row, 'db8', level=maxlev)
|
||||
threshold = 0.04
|
||||
for i in range(1, len(coeffs)):
|
||||
coeffs[i] = pywt.threshold(coeffs[i], threshold * max(coeffs[i]))
|
||||
output_spectrum_row = pywt.waverec(coeffs, 'db8')
|
||||
return output_spectrum_row
|
||||
|
||||
output_spectrum = None
|
||||
for i in range(input_spectrum.shape[0]):
|
||||
if i == 0:
|
||||
output_spectrum = wave_(input_spectrum[i])
|
||||
else:
|
||||
output_spectrum = np.vstack((output_spectrum, wave_(input_spectrum[i])))
|
||||
|
||||
return output_spectrum
|
||||
|
||||
# 通用预处理函数
|
||||
def Preprocessing(method, input_spectrum):
|
||||
if isinstance(input_spectrum, np.ndarray):
|
||||
input_spectrum = pd.DataFrame(input_spectrum)
|
||||
if method == "None":
|
||||
output_spectrum = input_spectrum
|
||||
elif method == 'MMS':
|
||||
output_spectrum = MMS(input_spectrum.values)
|
||||
elif method == 'SS':
|
||||
output_spectrum = SS(input_spectrum.values, r'E:\code\plastic\plastic20260224\plastic\plastic\output\20260224\modelsave\scaler_params.pkl')
|
||||
elif method == 'CT':
|
||||
output_spectrum = CT(input_spectrum.values)
|
||||
elif method == 'SNV':
|
||||
output_spectrum = SNV(input_spectrum)
|
||||
elif method == 'MA':
|
||||
output_spectrum = MA(input_spectrum.values)
|
||||
elif method == 'SG':
|
||||
output_spectrum = SG(input_spectrum.values)
|
||||
elif method == 'MSC':
|
||||
output_spectrum = MSC(input_spectrum.values)
|
||||
elif method == 'D1':
|
||||
output_spectrum = D1(input_spectrum.values)
|
||||
elif method == 'D2':
|
||||
output_spectrum = D2(input_spectrum.values)
|
||||
elif method == 'DT':
|
||||
output_spectrum = DT(input_spectrum.values)
|
||||
elif method == 'WVAE':
|
||||
output_spectrum = wave(input_spectrum.values)
|
||||
else:
|
||||
print("No such method of preprocessing!")
|
||||
output_spectrum = input_spectrum.values
|
||||
return output_spectrum
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
176
classification_model/WaveSelect/Cars.py
Normal file
176
classification_model/WaveSelect/Cars.py
Normal file
@ -0,0 +1,176 @@
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
import copy
|
||||
from sklearn.cross_decomposition import PLSRegression
|
||||
from sklearn.metrics import mean_squared_error
|
||||
from sklearn.model_selection import KFold
|
||||
|
||||
|
||||
def PC_Cross_Validation(X, y, pc, cv):
|
||||
'''
|
||||
X : 光谱矩阵 (DataFrame) nxm
|
||||
y : 浓度阵 (Series) (化学值)
|
||||
pc: 最大主成分数
|
||||
cv: 交叉验证数量
|
||||
return :
|
||||
RMSECV: 各主成分数对应的RMSECV
|
||||
rindex: 最佳主成分数
|
||||
'''
|
||||
kf = KFold(n_splits=cv)
|
||||
RMSECV = []
|
||||
for i in range(pc):
|
||||
RMSE = []
|
||||
for train_index, test_index in kf.split(X):
|
||||
x_train, x_test = X.iloc[train_index], X.iloc[test_index]
|
||||
y_train, y_test = y.iloc[train_index], y.iloc[test_index]
|
||||
pls = PLSRegression(n_components=i + 1)
|
||||
pls.fit(x_train, y_train)
|
||||
y_predict = pls.predict(x_test)
|
||||
RMSE.append(np.sqrt(mean_squared_error(y_test, y_predict)))
|
||||
RMSE_mean = np.mean(RMSE)
|
||||
RMSECV.append(RMSE_mean)
|
||||
rindex = np.argmin(RMSECV)
|
||||
return RMSECV, rindex
|
||||
|
||||
|
||||
def Cross_Validation(X, y, pc, cv):
|
||||
'''
|
||||
X : 光谱矩阵 (DataFrame) nxm
|
||||
y : 浓度阵 (Series) (化学值)
|
||||
pc: 最大主成分数
|
||||
cv: 交叉验证数量
|
||||
return :
|
||||
RMSECV: 各主成分数对应的RMSECV
|
||||
'''
|
||||
kf = KFold(n_splits=cv)
|
||||
RMSE = []
|
||||
for train_index, test_index in kf.split(X):
|
||||
x_train, x_test = X.iloc[train_index], X.iloc[test_index]
|
||||
y_train, y_test = y.iloc[train_index], y.iloc[test_index]
|
||||
pls = PLSRegression(n_components=pc)
|
||||
pls.fit(x_train, y_train)
|
||||
y_predict = pls.predict(x_test)
|
||||
RMSE.append(np.sqrt(mean_squared_error(y_test, y_predict)))
|
||||
RMSE_mean = np.mean(RMSE)
|
||||
return RMSE_mean
|
||||
|
||||
|
||||
def CARS_Cloud(X, y, N=50, f=20, cv=10, save_fig=False, save_path=None):
|
||||
'''
|
||||
X : 光谱矩阵 (DataFrame 或 ndarray)
|
||||
y : 浓度阵 (Series 或 ndarray)
|
||||
N : 蒙特卡洛迭代次数
|
||||
f : 最大特征数
|
||||
cv : 交叉验证的次数
|
||||
save_fig : 是否保存图像
|
||||
save_path : 图像保存路径
|
||||
return :
|
||||
OptWave : 选择的波长
|
||||
'''
|
||||
p = 0.8
|
||||
m, n = X.shape
|
||||
u = np.power((n / 2), (1 / (N - 1)))
|
||||
k = (1 / (N - 1)) * np.log(n / 2)
|
||||
cal_num = np.round(m * p)
|
||||
b2 = np.arange(n)
|
||||
x = X # 将 DataFrame 转换为 numpy 数组
|
||||
y = y # 将 Series 转换为 numpy 数组
|
||||
D = np.vstack((np.array(b2).reshape(1, -1), x))
|
||||
WaveData = []
|
||||
WaveNum = []
|
||||
RMSECV = []
|
||||
r = []
|
||||
|
||||
for i in range(1, N + 1):
|
||||
r.append(u * np.exp(-1 * k * i))
|
||||
wave_num = int(np.round(r[i - 1] * n))
|
||||
WaveNum = np.hstack((WaveNum, wave_num))
|
||||
cal_index = np.random.choice(np.arange(m), size=int(cal_num), replace=False)
|
||||
wave_index = b2[:wave_num].reshape(1, -1)[0]
|
||||
|
||||
# 使用 np.ix_ 来进行行列索引
|
||||
xcal = x[np.ix_(cal_index, wave_index)] # 选择对应的行和列
|
||||
ycal = y[cal_index] # 选择对应的 y
|
||||
|
||||
# 将 ycal 转换为一维数组
|
||||
ycal = ycal.ravel() # 使其成为一维数组
|
||||
|
||||
x = x[:, wave_index] # 更新 x
|
||||
D = D[:, wave_index] # 更新 D
|
||||
d = D[0, :].reshape(1, -1)
|
||||
wnum = n - wave_num
|
||||
if wnum > 0:
|
||||
d = np.hstack((d, np.full((1, wnum), -1)))
|
||||
if len(WaveData) == 0:
|
||||
WaveData = d
|
||||
else:
|
||||
WaveData = np.vstack((WaveData, d.reshape(1, -1)))
|
||||
|
||||
if wave_num < f:
|
||||
f = wave_num
|
||||
|
||||
pls = PLSRegression(n_components=f)
|
||||
pls.fit(xcal, ycal)
|
||||
beta = pls.coef_
|
||||
|
||||
# 针对新版sklearn处理 coef_ 的方式
|
||||
if beta.shape[0] == 1: # 新版sklearn,(1, x)
|
||||
b = np.abs(beta[0]) # 从第一行提取数据
|
||||
coeff = beta[0, b2] # 修改为beta[0, b2],因为coef只有一行
|
||||
else: # 旧版sklearn,(x, 1)
|
||||
b = np.abs(beta[:, 0]) # 从列中提取数据
|
||||
coeff = beta[b2, 0] # 修改为beta[b2, 0],因为coef只有一列
|
||||
|
||||
b2 = np.argsort(-b, axis=0)
|
||||
coef = copy.deepcopy(beta)
|
||||
coeff = coef[b2, :].reshape(len(b2), -1)
|
||||
rmsecv, rindex = PC_Cross_Validation(pd.DataFrame(xcal), pd.Series(ycal), f, cv)
|
||||
RMSECV.append(Cross_Validation(pd.DataFrame(xcal), pd.Series(ycal), rindex + 1, cv))
|
||||
|
||||
WAVE = []
|
||||
for i in range(WaveData.shape[0]):
|
||||
wd = WaveData[i, :]
|
||||
WD = np.ones((len(wd)))
|
||||
for j in range(len(wd)):
|
||||
ind = np.where(wd == j)
|
||||
if len(ind[0]) == 0:
|
||||
WD[j] = 0
|
||||
else:
|
||||
WD[j] = wd[ind[0]]
|
||||
if len(WAVE) == 0:
|
||||
WAVE = copy.deepcopy(WD)
|
||||
else:
|
||||
WAVE = np.vstack((WAVE, WD.reshape(1, -1)))
|
||||
|
||||
MinIndex = np.argmin(RMSECV)
|
||||
Optimal = WAVE[MinIndex, :]
|
||||
boindex = np.where(Optimal != 0)
|
||||
OptWave = boindex[0]
|
||||
|
||||
plt.figure(figsize=(12, 10))
|
||||
# 设置字体为新罗马
|
||||
plt.rcParams['font.sans-serif'] = ['Times New Roman'] # 使用 Times New Roman 字体
|
||||
plt.rcParams['axes.unicode_minus'] = False # 解决负号显示问题
|
||||
fonts = 20
|
||||
|
||||
plt.subplot(211)
|
||||
plt.xlabel('Monte Carlo Iterations', fontsize=fonts)
|
||||
plt.ylabel('Number of Selected Wavelengths', fontsize=fonts)
|
||||
plt.title('Optimal Iteration: ' + str(MinIndex), fontsize=fonts)
|
||||
plt.plot(np.arange(N), WaveNum)
|
||||
|
||||
plt.subplot(212)
|
||||
plt.xlabel('Monte Carlo Iterations', fontsize=fonts)
|
||||
plt.ylabel('RMSECV', fontsize=fonts)
|
||||
plt.plot(np.arange(N), RMSECV)
|
||||
|
||||
# 保存图像
|
||||
if save_fig:
|
||||
plt.savefig(save_path) # 保存图像到文件
|
||||
print(f"The figure has been saved as {save_path}")
|
||||
|
||||
|
||||
# plt.show()
|
||||
|
||||
return OptWave
|
||||
59
classification_model/WaveSelect/GA.py
Normal file
59
classification_model/WaveSelect/GA.py
Normal file
@ -0,0 +1,59 @@
|
||||
from deap import base, creator, tools, algorithms
|
||||
import numpy as np
|
||||
from sklearn.datasets import make_classification
|
||||
from sklearn.model_selection import cross_val_score
|
||||
from sklearn.ensemble import RandomForestClassifier
|
||||
|
||||
|
||||
def GA(X, y, n_generations=20, population_size=50, crossover_prob=0.7, mutation_prob=0.2):
|
||||
"""
|
||||
使用遗传算法进行特征选择,返回选择的特征索引。
|
||||
|
||||
参数:
|
||||
X (ndarray): 特征矩阵
|
||||
y (ndarray): 标签
|
||||
n_generations (int): 迭代次数
|
||||
population_size (int): 种群大小
|
||||
crossover_prob (float): 交叉概率
|
||||
mutation_prob (float): 变异概率
|
||||
|
||||
返回:
|
||||
list: 选择的特征索引
|
||||
"""
|
||||
# 创建适应度和个体
|
||||
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
|
||||
creator.create("Individual", list, fitness=creator.FitnessMax)
|
||||
|
||||
toolbox = base.Toolbox()
|
||||
toolbox.register("attr_bool", lambda: np.random.randint(0, 2))
|
||||
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, n=X.shape[1])
|
||||
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
|
||||
|
||||
# 定义适应度函数
|
||||
def evaluate(individual):
|
||||
selected_features = [index for index, val in enumerate(individual) if val == 1]
|
||||
if not selected_features:
|
||||
return 0, # 没有特征时适应度为 0
|
||||
X_selected = X[:, selected_features]
|
||||
clf = RandomForestClassifier(random_state=42)
|
||||
score = cross_val_score(clf, X_selected, y, cv=5).mean() # 5 折交叉验证
|
||||
return score,
|
||||
|
||||
toolbox.register("evaluate", evaluate)
|
||||
toolbox.register("mate", tools.cxTwoPoint)
|
||||
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
|
||||
toolbox.register("select", tools.selTournament, tournsize=3)
|
||||
|
||||
# 初始化种群
|
||||
population = toolbox.population(n=population_size)
|
||||
|
||||
# 运行遗传算法
|
||||
result_population, _ = algorithms.eaSimple(population, toolbox, cxpb=crossover_prob,
|
||||
mutpb=mutation_prob, ngen=n_generations,
|
||||
verbose=False)
|
||||
|
||||
# 获取最优个体
|
||||
best_individual = tools.selBest(result_population, k=1)[0]
|
||||
selected_features = [index for index, val in enumerate(best_individual) if val == 1]
|
||||
|
||||
return selected_features
|
||||
41
classification_model/WaveSelect/Lar.py
Normal file
41
classification_model/WaveSelect/Lar.py
Normal file
@ -0,0 +1,41 @@
|
||||
"""
|
||||
-*- coding: utf-8 -*-
|
||||
@Time :2022/04/12 17:10
|
||||
@Author : Pengyou FU
|
||||
@blogs : https://blog.csdn.net/Echo_Code?spm=1000.2115.3001.5343
|
||||
@github : https://github.com/FuSiry/OpenSA
|
||||
@WeChat : Fu_siry
|
||||
@License:Apache-2.0 license
|
||||
"""
|
||||
|
||||
from sklearn import linear_model
|
||||
import numpy as np
|
||||
|
||||
def Lar(X, y, nums=40):
|
||||
"""
|
||||
使用 LARS(Least Angle Regression)选择重要的特征波长。
|
||||
|
||||
参数:
|
||||
X : np.ndarray,预测变量矩阵(输入数据)
|
||||
y : np.ndarray,标签(目标值)
|
||||
nums : int,选择的特征点数量,默认为 40
|
||||
|
||||
返回:
|
||||
np.ndarray,选择的特征波长索引
|
||||
"""
|
||||
# 初始化 LARS 模型
|
||||
Lars = linear_model.Lars()
|
||||
|
||||
# 拟合模型
|
||||
Lars.fit(X, y)
|
||||
|
||||
# 获取回归系数的绝对值,表示特征的重要性
|
||||
corflist = np.abs(Lars.coef_)
|
||||
|
||||
# 将系数转换为数组并按重要性排序,选择前 nums 个最重要的特征
|
||||
SpectrumList = np.argsort(corflist)[-nums:][::-1]
|
||||
|
||||
# 对选择的特征索引进行排序,保证顺序一致
|
||||
SpectrumList = np.sort(SpectrumList)
|
||||
|
||||
return SpectrumList
|
||||
49
classification_model/WaveSelect/MRMR.py
Normal file
49
classification_model/WaveSelect/MRMR.py
Normal file
@ -0,0 +1,49 @@
|
||||
import pymrmr
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class MRMRFeatureSelection:
|
||||
def __init__(self, X, y):
|
||||
"""
|
||||
初始化 mRMR 特征选择模块。
|
||||
|
||||
:param X: 输入特征矩阵 (DataFrame),每列为一个特征。
|
||||
:param y: 目标变量 (Series),与特征矩阵 X 对应。
|
||||
"""
|
||||
self.X = X
|
||||
self.y = y
|
||||
self.selected_features = None
|
||||
|
||||
def select_features(self, k=18, method='MIQ'):
|
||||
"""
|
||||
执行 mRMR 特征选择。
|
||||
|
||||
:param k: 选择的特征个数。
|
||||
:param method: 选择的 mRMR 方法 ('MIQ' 或 'MRMR')。
|
||||
:return: 选择的特征列表
|
||||
"""
|
||||
# 拼接特征和目标变量
|
||||
df = pd.concat([self.y, self.X], axis=1)
|
||||
|
||||
# 使用 pymrmr 进行 mRMR 特征选择
|
||||
self.selected_features = pymrmr.mRMR(df, method, k)
|
||||
|
||||
return self.selected_features
|
||||
|
||||
def get_selected_features(self):
|
||||
"""
|
||||
获取已选择的特征。
|
||||
|
||||
:return: 选择的特征
|
||||
"""
|
||||
return self.selected_features
|
||||
|
||||
def get_selected_feature_names(self):
|
||||
"""
|
||||
获取已选择特征的列名
|
||||
|
||||
:return: 选择的特征列名列表
|
||||
"""
|
||||
if self.selected_features is None:
|
||||
return None
|
||||
return self.selected_features
|
||||
24
classification_model/WaveSelect/Pca.py
Normal file
24
classification_model/WaveSelect/Pca.py
Normal file
@ -0,0 +1,24 @@
|
||||
"""
|
||||
-*- coding: utf-8 -*-
|
||||
@Time :2022/04/12 17:10
|
||||
@Author : Pengyou FU
|
||||
@blogs : https://blog.csdn.net/Echo_Code?spm=1000.2115.3001.5343
|
||||
@github : https://github.com/FuSiry/OpenSA
|
||||
@WeChat : Fu_siry
|
||||
@License:Apache-2.0 license
|
||||
|
||||
"""
|
||||
|
||||
from sklearn.decomposition import PCA
|
||||
|
||||
def Pca(X, nums=20):
|
||||
"""
|
||||
:param X: raw spectrum data, shape (n_samples, n_features)
|
||||
:param nums: Number of principal components retained
|
||||
:return: X_reduction:Spectral data after dimensionality reduction
|
||||
"""
|
||||
pca = PCA(n_components=nums) # 保留的特征数码
|
||||
pca.fit(X)
|
||||
X_reduction = pca.transform(X)
|
||||
|
||||
return X_reduction
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user