77 lines
3.1 KiB
Python
77 lines
3.1 KiB
Python
import pandas as pd
|
||
|
||
def process_plantcv_outputs(observations):
|
||
"""
|
||
直接处理 PlantCV 内存中的观测数据
|
||
:param observations: pcv.outputs.observations 数据结构
|
||
:param file_name: 当前处理的文件名
|
||
:return: 合并后的光谱和形状特征 DataFrame
|
||
"""
|
||
all_samples = []
|
||
|
||
# 遍历每个样本的观测数据
|
||
for sample_id, sample_data in observations.items():
|
||
# 初始化样本记录
|
||
sample_record = {
|
||
"Sample ID": sample_id,
|
||
}
|
||
|
||
# 处理光谱数据
|
||
if "wavelength_means" in sample_data:
|
||
spectral_data = sample_data["wavelength_means"]
|
||
|
||
# 提取波长标签和值
|
||
if "label" in spectral_data and "value" in spectral_data:
|
||
wavelengths = spectral_data["label"]
|
||
reflectance = spectral_data["value"]
|
||
|
||
# 确保数据长度一致
|
||
if len(wavelengths) == len(reflectance):
|
||
# 创建波长列(转换为微米)
|
||
for i, wl in enumerate(wavelengths):
|
||
try:
|
||
# 波长单位转换为微米 (nm/1000 = μm)
|
||
sample_record[f"wavelength_{wl:.2f}"] = reflectance[i] / 10000
|
||
except (ValueError, TypeError):
|
||
continue
|
||
|
||
# 处理形状特征
|
||
# 在形状特征提取部分添加轮廓处理
|
||
shape_features = [
|
||
'area', 'convex_hull_area', 'solidity', 'perimeter',
|
||
'width', 'height', 'circularity', 'shape_factor', 'aspect_ratio'
|
||
]
|
||
|
||
# 添加轮廓特征
|
||
contour_features = ['contour']
|
||
|
||
for feature in shape_features + contour_features:
|
||
if feature in sample_data:
|
||
feature_data = sample_data[feature]
|
||
|
||
if feature == 'contour': # 特殊处理轮廓数据
|
||
if "value" in feature_data and feature_data["value"]:
|
||
# 轮廓数据通常是一个点列表 [[x1, y1], [x2, y2], ...]
|
||
contour_points = feature_data["value"]
|
||
# 如果轮廓数据点数小于等于3,跳过该数据
|
||
if len(contour_points) <= 3:
|
||
print(f"警告:轮廓数据点数少于或等于3个,跳过该数据")
|
||
continue # 跳过该数据,直接进入下一个特征
|
||
# 存储为元组列表以便后续处理
|
||
sample_record['contour'] = [(point[0], point[1]) for point in contour_points]
|
||
else: # 处理其他形状特征
|
||
if "value" in feature_data:
|
||
sample_record[feature] = feature_data["value"]
|
||
all_samples.append(sample_record)
|
||
|
||
# 创建 DataFrame
|
||
df = pd.DataFrame(all_samples)
|
||
|
||
# 确保所有波长列都是数值类型
|
||
wavelength_cols = [col for col in df.columns if col.startswith("wavelength_")]
|
||
for col in wavelength_cols:
|
||
df[col] = pd.to_numeric(df[col], errors='coerce')
|
||
|
||
return df
|
||
|