Files
micro_plastic/onlyspectral_background.py
2026-02-25 09:42:51 +08:00

163 lines
5.1 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import pandas as pd
from pathlib import Path
import numpy as np
def process_single_bil_background(full_bil_path):
"""
处理单个BIL文件提取滤纸背景的平均光谱不需要mask
参数:
full_bil_path: BIL文件路径
返回:
mean_spectrum: 归一化的平均光谱数组
"""
try:
from plantcv import plantcv as pcv
import cv2
except ImportError:
raise ImportError("需要导入plantcv和cv2库from plantcv import plantcv as pcv; import cv2")
# 设置 PlantCV 的全局参数
pcv.params.debug = "None"
pcv.params.dpi = 100
pcv.params.text_size = 1
pcv.params.text_thickness = 1
# 读取光谱图像ENVI 格式)
spectral_array = pcv.readimage(filename=full_bil_path, mode='envi')
bath_100 = spectral_array.array_data[:, :, 100]
# 使用阈值提取滤纸区域(背景区域,阈值>5500
bath_over = pcv.threshold.binary(gray_img=bath_100, threshold=5500, object_type='light')
# 对滤纸掩膜进行形态学操作
bath_erode = pcv.erode(gray_img=bath_over, ksize=3, i=4)
dilate_img = pcv.dilate(gray_img=bath_erode, ksize=3, i=4)
# 填充大的孔洞
dilate_img = pcv.fill(bin_img=dilate_img, size=10000)
# 使用cv2.connectedComponents找到连通区域
num1, labeled_spectral_mask = cv2.connectedComponents(dilate_img.astype(np.uint8))
print(f" 检测到 {num1 - 1} 个滤纸区域")
# 找到最大面积的对象
max_area = 0
max_label = 0
for label in range(1, num1):
obj_mask = labeled_spectral_mask == label
area = np.sum(obj_mask)
if area > max_area:
max_area = area
max_label = label
# 提取最大面积对象的光谱
if max_label > 0:
max_obj_mask = labeled_spectral_mask == max_label
spectral_values = spectral_array.array_data[max_obj_mask, :]
mean_spectrum = np.mean(spectral_values, axis=0) / 10000 # 归一化
return mean_spectrum
else:
print(f" 警告: 未找到有效的滤纸区域")
n_bands = spectral_array.array_data.shape[-1]
return np.zeros(n_bands)
def batch_process_bil_background(bil_folder, output_csv_path):
"""
批量处理文件夹中的BIL文件提取滤纸背景光谱并保存到CSV
参数:
bil_folder: 包含.bil文件的文件夹路径
output_csv_path: 输出CSV文件路径宽格式每行一个文件每列一个波段
返回:
df: 包含所有文件光谱的DataFrame
"""
bil_folder = Path(bil_folder)
if not bil_folder.exists():
raise ValueError(f"文件夹不存在: {bil_folder}")
# 查找所有.bil文件
bil_files = []
for ext in ['.bil', '.BIL']:
bil_files.extend(list(bil_folder.glob(f'*{ext}')))
if len(bil_files) == 0:
print(f"警告: 在 {bil_folder} 中未找到任何.bil文件")
return None
print(f"找到 {len(bil_files)} 个BIL文件")
# 确保输出目录存在
output_path = Path(output_csv_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
# 存储所有结果
all_spectra = []
filenames = []
failed_files = []
# 处理每个文件
for i, bil_path in enumerate(bil_files, 1):
print(f"\n[{i}/{len(bil_files)}] 处理文件: {bil_path.name}")
print(f"{'=' * 60}")
try:
spectrum = process_single_bil_background(str(bil_path))
if spectrum is not None and len(spectrum) > 0 and np.sum(spectrum) > 0:
all_spectra.append(spectrum)
filenames.append(bil_path.stem)
print(f" ✓ 成功提取光谱 ({len(spectrum)} 个波段)")
else:
failed_files.append(bil_path.name)
print(f" ✗ 未能提取有效光谱")
except Exception as e:
failed_files.append(bil_path.name)
print(f" ✗ 处理失败: {str(e)}")
import traceback
traceback.print_exc()
continue
# 合并结果
if len(all_spectra) == 0:
print("\n没有有效的结果可以保存")
return None
# 创建宽格式DataFrame
n_bands = len(all_spectra[0])
column_names = [f'band_{i}' for i in range(n_bands)]
df = pd.DataFrame(all_spectra, columns=column_names, index=filenames)
df.index.name = 'filename'
df = df.reset_index()
# 保存到CSV
df.to_csv(output_csv_path, index=False)
# 打印统计信息
print(f"\n{'=' * 60}")
print(f"批量处理完成!")
print(f" 总共文件数: {len(bil_files)}")
print(f" 成功处理: {len(all_spectra)}")
print(f" 失败文件: {len(failed_files)}")
if failed_files:
print(f" 失败列表: {', '.join(failed_files)}")
print(f" 光谱波段数: {n_bands}")
print(f" 输出文件: {output_csv_path}")
print(f"{'=' * 60}")
return df
bil_folder = r"D:\Data\Traindata-09\whitebord"
output_csv = r"D:\Data\Traindata-09\whitebord\filter_paper_spectra.csv"
batch_process_bil_background(bil_folder, output_csv)