Files
micro_plastic/outputs2dataframe.py
2026-02-25 09:42:51 +08:00

77 lines
3.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
def process_plantcv_outputs(observations):
"""
直接处理 PlantCV 内存中的观测数据
:param observations: pcv.outputs.observations 数据结构
:param file_name: 当前处理的文件名
:return: 合并后的光谱和形状特征 DataFrame
"""
all_samples = []
# 遍历每个样本的观测数据
for sample_id, sample_data in observations.items():
# 初始化样本记录
sample_record = {
"Sample ID": sample_id,
}
# 处理光谱数据
if "wavelength_means" in sample_data:
spectral_data = sample_data["wavelength_means"]
# 提取波长标签和值
if "label" in spectral_data and "value" in spectral_data:
wavelengths = spectral_data["label"]
reflectance = spectral_data["value"]
# 确保数据长度一致
if len(wavelengths) == len(reflectance):
# 创建波长列(转换为微米)
for i, wl in enumerate(wavelengths):
try:
# 波长单位转换为微米 (nm/1000 = μm)
sample_record[f"wavelength_{wl:.2f}"] = reflectance[i] / 10000
except (ValueError, TypeError):
continue
# 处理形状特征
# 在形状特征提取部分添加轮廓处理
shape_features = [
'area', 'convex_hull_area', 'solidity', 'perimeter',
'width', 'height', 'circularity', 'shape_factor', 'aspect_ratio'
]
# 添加轮廓特征
contour_features = ['contour']
for feature in shape_features + contour_features:
if feature in sample_data:
feature_data = sample_data[feature]
if feature == 'contour': # 特殊处理轮廓数据
if "value" in feature_data and feature_data["value"]:
# 轮廓数据通常是一个点列表 [[x1, y1], [x2, y2], ...]
contour_points = feature_data["value"]
# 如果轮廓数据点数小于等于3跳过该数据
if len(contour_points) <= 3:
print(f"警告轮廓数据点数少于或等于3个跳过该数据")
continue # 跳过该数据,直接进入下一个特征
# 存储为元组列表以便后续处理
sample_record['contour'] = [(point[0], point[1]) for point in contour_points]
else: # 处理其他形状特征
if "value" in feature_data:
sample_record[feature] = feature_data["value"]
all_samples.append(sample_record)
# 创建 DataFrame
df = pd.DataFrame(all_samples)
# 确保所有波长列都是数值类型
wavelength_cols = [col for col in df.columns if col.startswith("wavelength_")]
for col in wavelength_cols:
df[col] = pd.to_numeric(df[col], errors='coerce')
return df