import pandas as pd def process_plantcv_outputs(observations): """ 直接处理 PlantCV 内存中的观测数据 :param observations: pcv.outputs.observations 数据结构 :param file_name: 当前处理的文件名 :return: 合并后的光谱和形状特征 DataFrame """ all_samples = [] # 遍历每个样本的观测数据 for sample_id, sample_data in observations.items(): # 初始化样本记录 sample_record = { "Sample ID": sample_id, } # 处理光谱数据 if "wavelength_means" in sample_data: spectral_data = sample_data["wavelength_means"] # 提取波长标签和值 if "label" in spectral_data and "value" in spectral_data: wavelengths = spectral_data["label"] reflectance = spectral_data["value"] # 确保数据长度一致 if len(wavelengths) == len(reflectance): # 创建波长列(转换为微米) for i, wl in enumerate(wavelengths): try: # 波长单位转换为微米 (nm/1000 = μm) sample_record[f"wavelength_{wl:.2f}"] = reflectance[i] / 10000 except (ValueError, TypeError): continue # 处理形状特征 # 在形状特征提取部分添加轮廓处理 shape_features = [ 'area', 'convex_hull_area', 'solidity', 'perimeter', 'width', 'height', 'circularity', 'shape_factor', 'aspect_ratio' ] # 添加轮廓特征 contour_features = ['contour'] for feature in shape_features + contour_features: if feature in sample_data: feature_data = sample_data[feature] if feature == 'contour': # 特殊处理轮廓数据 if "value" in feature_data and feature_data["value"]: # 轮廓数据通常是一个点列表 [[x1, y1], [x2, y2], ...] contour_points = feature_data["value"] # 如果轮廓数据点数小于等于3,跳过该数据 if len(contour_points) <= 3: print(f"警告:轮廓数据点数少于或等于3个,跳过该数据") continue # 跳过该数据,直接进入下一个特征 # 存储为元组列表以便后续处理 sample_record['contour'] = [(point[0], point[1]) for point in contour_points] else: # 处理其他形状特征 if "value" in feature_data: sample_record[feature] = feature_data["value"] all_samples.append(sample_record) # 创建 DataFrame df = pd.DataFrame(all_samples) # 确保所有波长列都是数值类型 wavelength_cols = [col for col in df.columns if col.startswith("wavelength_")] for col in wavelength_cols: df[col] = pd.to_numeric(df[col], errors='coerce') return df