初始提交
This commit is contained in:
76
outputs2dataframe.py
Normal file
76
outputs2dataframe.py
Normal file
@ -0,0 +1,76 @@
|
||||
import pandas as pd
|
||||
|
||||
def process_plantcv_outputs(observations):
|
||||
"""
|
||||
直接处理 PlantCV 内存中的观测数据
|
||||
:param observations: pcv.outputs.observations 数据结构
|
||||
:param file_name: 当前处理的文件名
|
||||
:return: 合并后的光谱和形状特征 DataFrame
|
||||
"""
|
||||
all_samples = []
|
||||
|
||||
# 遍历每个样本的观测数据
|
||||
for sample_id, sample_data in observations.items():
|
||||
# 初始化样本记录
|
||||
sample_record = {
|
||||
"Sample ID": sample_id,
|
||||
}
|
||||
|
||||
# 处理光谱数据
|
||||
if "wavelength_means" in sample_data:
|
||||
spectral_data = sample_data["wavelength_means"]
|
||||
|
||||
# 提取波长标签和值
|
||||
if "label" in spectral_data and "value" in spectral_data:
|
||||
wavelengths = spectral_data["label"]
|
||||
reflectance = spectral_data["value"]
|
||||
|
||||
# 确保数据长度一致
|
||||
if len(wavelengths) == len(reflectance):
|
||||
# 创建波长列(转换为微米)
|
||||
for i, wl in enumerate(wavelengths):
|
||||
try:
|
||||
# 波长单位转换为微米 (nm/1000 = μm)
|
||||
sample_record[f"wavelength_{wl:.2f}"] = reflectance[i] / 10000
|
||||
except (ValueError, TypeError):
|
||||
continue
|
||||
|
||||
# 处理形状特征
|
||||
# 在形状特征提取部分添加轮廓处理
|
||||
shape_features = [
|
||||
'area', 'convex_hull_area', 'solidity', 'perimeter',
|
||||
'width', 'height', 'circularity', 'shape_factor', 'aspect_ratio'
|
||||
]
|
||||
|
||||
# 添加轮廓特征
|
||||
contour_features = ['contour']
|
||||
|
||||
for feature in shape_features + contour_features:
|
||||
if feature in sample_data:
|
||||
feature_data = sample_data[feature]
|
||||
|
||||
if feature == 'contour': # 特殊处理轮廓数据
|
||||
if "value" in feature_data and feature_data["value"]:
|
||||
# 轮廓数据通常是一个点列表 [[x1, y1], [x2, y2], ...]
|
||||
contour_points = feature_data["value"]
|
||||
# 如果轮廓数据点数小于等于3,跳过该数据
|
||||
if len(contour_points) <= 3:
|
||||
print(f"警告:轮廓数据点数少于或等于3个,跳过该数据")
|
||||
continue # 跳过该数据,直接进入下一个特征
|
||||
# 存储为元组列表以便后续处理
|
||||
sample_record['contour'] = [(point[0], point[1]) for point in contour_points]
|
||||
else: # 处理其他形状特征
|
||||
if "value" in feature_data:
|
||||
sample_record[feature] = feature_data["value"]
|
||||
all_samples.append(sample_record)
|
||||
|
||||
# 创建 DataFrame
|
||||
df = pd.DataFrame(all_samples)
|
||||
|
||||
# 确保所有波长列都是数值类型
|
||||
wavelength_cols = [col for col in df.columns if col.startswith("wavelength_")]
|
||||
for col in wavelength_cols:
|
||||
df[col] = pd.to_numeric(df[col], errors='coerce')
|
||||
|
||||
return df
|
||||
|
||||
Reference in New Issue
Block a user