初始提交

2026-02-25 09:42:51 +08:00
parent c25276c481
commit d84d886f35
182 changed files with 18438 additions and 0 deletions
--- a/outputs2dataframe.py
+++ b/outputs2dataframe.py
@ -0,0 +1,76 @@
+import pandas as pd
+
+def process_plantcv_outputs(observations):
+    """
+    直接处理 PlantCV 内存中的观测数据
+    :param observations: pcv.outputs.observations 数据结构
+    :param file_name: 当前处理的文件名
+    :return: 合并后的光谱和形状特征 DataFrame
+    """
+    all_samples = []
+
+    # 遍历每个样本的观测数据
+    for sample_id, sample_data in observations.items():
+        # 初始化样本记录
+        sample_record = {
+            "Sample ID": sample_id,
+        }
+
+        # 处理光谱数据
+        if "wavelength_means" in sample_data:
+            spectral_data = sample_data["wavelength_means"]
+
+            # 提取波长标签和值
+            if "label" in spectral_data and "value" in spectral_data:
+                wavelengths = spectral_data["label"]
+                reflectance = spectral_data["value"]
+
+                # 确保数据长度一致
+                if len(wavelengths) == len(reflectance):
+                    # 创建波长列（转换为微米）
+                    for i, wl in enumerate(wavelengths):
+                        try:
+                            # 波长单位转换为微米 (nm/1000 = μm)
+                            sample_record[f"wavelength_{wl:.2f}"] = reflectance[i] / 10000
+                        except (ValueError, TypeError):
+                            continue
+
+        # 处理形状特征
+        # 在形状特征提取部分添加轮廓处理
+        shape_features = [
+            'area', 'convex_hull_area', 'solidity', 'perimeter',
+            'width', 'height', 'circularity', 'shape_factor', 'aspect_ratio'
+        ]
+
+        # 添加轮廓特征
+        contour_features = ['contour']
+
+        for feature in shape_features  + contour_features:
+            if feature in sample_data:
+                feature_data = sample_data[feature]
+
+                if feature == 'contour':  # 特殊处理轮廓数据
+                    if "value" in feature_data and feature_data["value"]:
+                        # 轮廓数据通常是一个点列表 [[x1, y1], [x2, y2], ...]
+                        contour_points = feature_data["value"]
+                        # 如果轮廓数据点数小于等于3，跳过该数据
+                        if len(contour_points) <= 3:
+                            print(f"警告：轮廓数据点数少于或等于3个，跳过该数据")
+                            continue  # 跳过该数据，直接进入下一个特征
+                        # 存储为元组列表以便后续处理
+                        sample_record['contour'] = [(point[0], point[1]) for point in contour_points]
+                else:  # 处理其他形状特征
+                    if "value" in feature_data:
+                        sample_record[feature] = feature_data["value"]
+        all_samples.append(sample_record)
+
+    # 创建 DataFrame
+    df = pd.DataFrame(all_samples)
+
+    # 确保所有波长列都是数值类型
+    wavelength_cols = [col for col in df.columns if col.startswith("wavelength_")]
+    for col in wavelength_cols:
+        df[col] = pd.to_numeric(df[col], errors='coerce')
+
+    return df
+