From bd82850ce4f1f25c31217767ca3b648387dc1217 Mon Sep 17 00:00:00 2001 From: zhanghuilai Date: Tue, 22 Jul 2025 10:27:31 +0800 Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?= =?UTF-8?q?=20/?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Chla-time-plot.py | 210 ++++++++++++++++++++++++++++++++++++++ chl-a含量图.py | 43 ++++++++ chla_distintplot.py | 101 +++++++++++++++++++ data_spectral.py | 66 ++++++++++++ exportDSData2csv.py | 238 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 658 insertions(+) create mode 100644 Chla-time-plot.py create mode 100644 chl-a含量图.py create mode 100644 chla_distintplot.py create mode 100644 data_spectral.py create mode 100644 exportDSData2csv.py diff --git a/Chla-time-plot.py b/Chla-time-plot.py new file mode 100644 index 0000000..7618091 --- /dev/null +++ b/Chla-time-plot.py @@ -0,0 +1,210 @@ +import pandas as pd +import matplotlib.pyplot as plt +import matplotlib.dates as mdates +import numpy as np +from datetime import datetime + +# 设置叶绿素a阈值 +CHLOROPHYLL_THRESHOLD = 100.0 # 大于此值的叶绿素a数据将被剔除 + +# 1. 读取CSV文件 +try: + # 尝试自动检测文件格式 + df = pd.read_csv( + r"D:\WQ\zhanghuilai\hyperspectral-inversion\data\input\一代高光谱\2025-07-11_2025-07-21\不去耀斑index.csv", + header=None, skip_blank_lines=True, on_bad_lines='warn') + + # 检查列数,可能有多余列 + if df.shape[1] > 2: + print(f"警告:检测到 {df.shape[1]} 列,但预期只有2列。尝试提取前两列。") + df = df.iloc[:, :2] # 只保留前两列 + + # 重命名列 + df.columns = ['Time', 'Chl_a'] + + # 打印前几行用于调试 + print("原始数据预览:") + print(df.head()) + + # 检查是否有标题行 + if not isinstance(df['Time'].iloc[0], str) or any(char.isalpha() for char in str(df['Time'].iloc[0])): + print("检测到可能的标题行,跳过第一行") + df = pd.read_csv( + r"D:\WQ\zhanghuilai\hyperspectral-inversion\data\input\一代高光谱\2025-07-11_2025-07-21\不去耀斑index.csv", + header=None, skiprows=1, skip_blank_lines=True) + df = df.iloc[:, :2] # 只保留前两列 + df.columns = ['Time', 'Chl_a'] + print("处理后数据预览:") + print(df.head()) + +except Exception as e: + print(f"读取文件时出错: {e}") + exit() + +# 2. 转换时间格式 +try: + # 尝试解析时间 + df['Time'] = pd.to_datetime(df['Time'], errors='coerce', format='%Y/%m/%d %H:%M') + + # 检查是否有解析失败的时间 + if df['Time'].isna().any(): + print("部分时间解析失败,尝试其他格式...") + # 尝试其他常见格式 + df['Time'] = pd.to_datetime(df['Time'], errors='coerce', format='%Y-%m-%d %H:%M') + df['Time'] = pd.to_datetime(df['Time'], errors='coerce', format='%Y/%m/%d %H:%M:%S') + df['Time'] = pd.to_datetime(df['Time'], errors='coerce', format='%Y-%m-%d %H:%M:%S') + + # 如果仍有失败,尝试自动推断 + if df['Time'].isna().any(): + print("使用混合格式解析时间") + df['Time'] = pd.to_datetime(df['Time'], errors='coerce', format='mixed') + + # 删除无法解析时间的行 + na_count = df['Time'].isna().sum() + if na_count > 0: + print(f"警告: 删除了 {na_count} 行无法解析的时间数据") + df = df.dropna(subset=['Time']) + + # 提取日期和时间 + df['Date'] = df['Time'].dt.date + df['Time_of_day'] = df['Time'].dt.time + +except Exception as e: + print(f"时间解析错误: {e}") + exit() + +# 3. 确保叶绿素a列是数值型并应用阈值过滤 +try: + df['Chl_a'] = pd.to_numeric(df['Chl_a'], errors='coerce') + + # 记录过滤前的数据量 + original_count = len(df) + + # 应用阈值过滤 + above_threshold = df[df['Chl_a'] > CHLOROPHYLL_THRESHOLD] + if not above_threshold.empty: + print(f"警告: 发现 {len(above_threshold)} 行叶绿素a值超过阈值 {CHLOROPHYLL_THRESHOLD} μg/L,将被剔除") + print(above_threshold) + df = df[df['Chl_a'] <= CHLOROPHYLL_THRESHOLD] + + # 删除无效值 + na_count = df['Chl_a'].isna().sum() + if na_count > 0: + print(f"警告: 删除了 {na_count} 行无效的叶绿素值") + df = df.dropna(subset=['Chl_a']) + + # 报告过滤结果 + filtered_count = len(df) + print(f"数据过滤结果: 原始 {original_count} 行 → 保留 {filtered_count} 行") + +except Exception as e: + print(f"叶绿素值转换错误: {e}") + exit() + +# 4. 按日期分组计算统计量 +daily_stats = df.groupby('Date')['Chl_a'].agg(['mean', 'std', 'count']).reset_index() +daily_stats.columns = ['Date', 'Daily_Mean', 'Std_Dev', 'Sample_Count'] + +# 获取唯一日期 +unique_dates = sorted(df['Date'].unique()) + +# 5. 创建绘图 - 箱型图 +fig1, ax1 = plt.subplots(figsize=(14, 6)) + +# 绘制箱型图 +ax1.boxplot([df[df['Date'] == date]['Chl_a'] for date in unique_dates], labels=[str(date) for date in unique_dates]) + +# 添加平均线 +ax1.plot(range(1, len(unique_dates) + 1), daily_stats['Daily_Mean'], color='red', linestyle='--', label='Daily Mean') + +ax1.set_title('Daily Chlorophyll-a Concentration with Box Plot', fontsize=14) +ax1.set_ylabel('Chl-a (μg/L)', fontsize=12) +ax1.grid(axis='y', linestyle='--', alpha=0.7) + +# 添加图例 +ax1.legend() + +plt.tight_layout() +plt.savefig( + r'D:\WQ\zhanghuilai\hyperspectral-inversion\data\input\一代高光谱\2025-07-11_2025-07-21\plot\daily_chlorophyll_boxplot.png', + dpi=300, bbox_inches='tight') + +# +# # 6. 创建每日时间序列图 - 每个日期一个子图 +# fig2, axs = plt.subplots(len(unique_dates), 1, figsize=(14, 4 * len(unique_dates)), sharex=True) +# +# if len(unique_dates) == 1: +# axs = [axs] # 确保单个子图时也能正确处理 +# +# fig2.suptitle('Chlorophyll-a Concentration Variation Throughout the Day', fontsize=16, y=0.98) +# +# for i, date in enumerate(unique_dates): +# daily_data = df[df['Date'] == date].sort_values('Time') +# ax = axs[i] +# +# # 使用真实时间作横轴 +# ax.plot(daily_data['Time'], daily_data['Chl_a'], marker='', linestyle='-', color='royalblue', +# label=f'Chl-a on {date}', alpha=0.8) +# +# # 添加统计信息 +# daily_mean = daily_data['Chl_a'].mean() +# daily_std = daily_data['Chl_a'].std() +# ax.axhline(y=daily_mean, color='g', linestyle='--', alpha=0.7) +# ax.axhspan(daily_mean - daily_std, daily_mean + daily_std, color='lightgreen', alpha=0.3) +# +# ax.set_title(f"{date.strftime('%Y-%m-%d')} (n={len(daily_data)})", fontsize=12) +# ax.set_ylabel('Chl-a (μg/L)', fontsize=10) +# ax.grid(True, linestyle='--', alpha=0.7) +# +# ax.legend(loc='upper left') +# +# # 自动优化时间刻度显示 +# fig2.autofmt_xdate(rotation=45) +# +# # 保存每个日期的图像,确保文件名唯一 +# save_path = f'D:/WQ/zhanghuilai/hyperspectral-inversion/data/input/一代高光谱/2025-07-11_2025-07-21/plot/daily_chlorophyll_timeseries_{date.strftime("%Y-%m-%d")}.png' +# fig2.savefig(save_path, dpi=300, bbox_inches='tight') + +# 6. 为每个日期单独创建图像 +for date in unique_dates: + daily_data = df[df['Date'] == date].sort_values('Time') + + fig, ax = plt.subplots(figsize=(14, 4)) + + # 使用真实时间作横轴 + ax.plot(daily_data['Time'], daily_data['Chl_a'], marker='o', linestyle='-', color='royalblue', + label=f'Chl-a on {date}', alpha=0.8) + + # 添加统计信息 + daily_mean = daily_data['Chl_a'].mean() + daily_std = daily_data['Chl_a'].std() + ax.axhline(y=daily_mean, color='g', linestyle='--', alpha=0.7, label='Daily Mean') + ax.axhspan(daily_mean - daily_std, daily_mean + daily_std, color='lightgreen', alpha=0.3, label='±1 Std Dev') + + ax.set_title(f"Chlorophyll-a Time Series on {date.strftime('%Y-%m-%d')} (n={len(daily_data)})", fontsize=13) + ax.set_xlabel("Time", fontsize=11) + ax.set_ylabel("Chl-a (μg/L)", fontsize=11) + ax.grid(True, linestyle='--', alpha=0.7) + + # 设置时间格式优化显示 + ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M')) + fig.autofmt_xdate(rotation=45) + + ax.legend(loc='upper left') + + # 保存图像 + save_path = f'D:/WQ/zhanghuilai/hyperspectral-inversion/data/input/一代高光谱/2025-07-11_2025-07-21/plot/daily_chlorophyll_timeseries_{date.strftime("%Y-%m-%d")}.png' + fig.savefig(save_path, dpi=300, bbox_inches='tight') + plt.close(fig) # 防止内存过多占用 + +# 打印数据统计摘要 +print("\n数据统计摘要:") +print(f"叶绿素a阈值: {CHLOROPHYLL_THRESHOLD} μg/L") +print(f"总样本数: {len(df)}") +print(f"监测天数: {len(unique_dates)}") +print(f"平均每日样本数: {len(df) / len(unique_dates):.1f}") +print(f"叶绿素a总体均值: {df['Chl_a'].mean():.2f} ± {df['Chl_a'].std():.2f} μg/L") +print(f"叶绿素a最小值: {df['Chl_a'].min():.2f} μg/L") +print(f"叶绿素a最大值: {df['Chl_a'].max():.2f} μg/L") +print("\n每日统计:") +print(daily_stats) diff --git a/chl-a含量图.py b/chl-a含量图.py new file mode 100644 index 0000000..1749cbc --- /dev/null +++ b/chl-a含量图.py @@ -0,0 +1,43 @@ +import pandas as pd +import matplotlib.pyplot as plt +from matplotlib import font_manager + +# 1. 设置中文字体(SimHei 或自行替换路径) +font_path = 'C:/Windows/Fonts/simhei.ttf' # 适用于 Windows +zh_font = font_manager.FontProperties(fname=font_path) +plt.rcParams['font.sans-serif'] = [zh_font.get_name()] +plt.rcParams['axes.unicode_minus'] = False # 解决负号乱码 + +# 2. 读取 CSV 数据 +file_path = r"D:\WQ\zhanghuilai\hyperspectral-inversion\data\input\一代高光谱\output\chla画图.csv" # ← 替换为你的实际文件路径 +df = pd.read_csv(file_path) + +# 3. 预处理数据 +df.columns = ['timestamp', 'chl_a'] +df['timestamp'] = pd.to_datetime(df['timestamp']) +df['date'] = df['timestamp'].dt.date # 只保留年月日 + +# 4. 计算每日平均和标准差 +daily_stats = df.groupby('date')['chl_a'].agg(['mean', 'std']).reset_index() + +# 5. 绘图 +fig, axs = plt.subplots(2, 1, figsize=(10, 8)) + +# 图 1:每日 chl-a 浓度柱状图(含误差线) +axs[0].bar(daily_stats['date'], daily_stats['mean'], yerr=daily_stats['std'], + capsize=4, color='skyblue', edgecolor='black') +axs[0].set_title('每日 chl-a 浓度均值(含标准差)', fontproperties=zh_font) +axs[0].set_xlabel('日期', fontproperties=zh_font) +axs[0].set_ylabel('chl-a 浓度', fontproperties=zh_font) +axs[0].tick_params(axis='x', rotation=45) + +# 图 2:chl-a 浓度分布直方图 +axs[1].hist(df['chl_a'], bins=30, color='lightgreen', edgecolor='black') +axs[1].set_title('chl-a 浓度分布直方图', fontproperties=zh_font) +axs[1].set_xlabel('chl-a 浓度', fontproperties=zh_font) +axs[1].set_ylabel('频数', fontproperties=zh_font) + +# 6. 调整布局并保存图像 +plt.tight_layout() +plt.savefig('D:\WQ\zhanghuilai\hyperspectral-inversion\data\input\一代高光谱\output\chl_a_图表.png', dpi=300, bbox_inches='tight') # 高分辨率保存 +plt.show() diff --git a/chla_distintplot.py b/chla_distintplot.py new file mode 100644 index 0000000..44d5e7e --- /dev/null +++ b/chla_distintplot.py @@ -0,0 +1,101 @@ +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt + + +def plot_spectra(csv_file_path, chla_file_path): + """ + 读取CSV文件并根据chl-a指数是否为0将光谱分为两部分绘制 + 用红线标出650.88nm, 670.41nm, 706.54nm波长位置 + :param csv_file_path: 原始光谱数据CSV文件路径 + :param chla_file_path: 包含chl-a指数的CSV文件路径 + """ + # 读取原始光谱数据 + df_spectral = pd.read_csv(csv_file_path) + wavelengths = df_spectral.columns[1:].astype(float) # 假设从第2列开始是光谱数据 + + # 读取包含chl-a指数的结果文件 + df_chla = pd.read_csv(chla_file_path) + + # 确保两个文件的行数匹配 + if len(df_spectral) != len(df_chla): + raise ValueError("光谱数据文件和水质参数文件行数不匹配") + + # 分离光谱数据 + spectral_data = df_spectral.iloc[:, 1:].values.astype(float) + + # 获取chl-a列 + chla_values = df_chla['chl-a'].values + + # 根据chl-a值创建分组 + group1_indices = np.where(chla_values == 0)[0] # chl-a=0的索引 + group2_indices = np.where(chla_values != 0)[0] # chl-a≠0的索引 + + # 创建两个图表 + plt.figure(figsize=(15, 10)) + + # 定义要标记的波长位置 + highlight_wavelengths = [650.88, 670.41, 706.54] + colors = ['red', 'green', 'blue'] # 为每个波长使用不同的颜色 + + # 第一组:chl-a=0 + ax1 = plt.subplot(2, 1, 1) + if len(group1_indices) > 0: + group1_data = spectral_data[group1_indices, :] + for i in range(group1_data.shape[0]): + plt.plot(wavelengths, group1_data[i, :], linewidth=1, alpha=0.7) + + mean_spectrum = np.mean(group1_data, axis=0) + plt.plot(wavelengths, mean_spectrum, 'k-', linewidth=2.5, label='Average Spectrum') + + # 添加波长标记线 + y_min, y_max = plt.ylim() + for i, wave in enumerate(highlight_wavelengths): + plt.axvline(x=wave, color=colors[i], linestyle='--', alpha=0.7, + label=f'{wave}nm' if i == 0 else '') + plt.text(wave, y_max * 0.95, f'{wave}nm', + fontsize=10, color=colors[i], ha='center', va='top') + + plt.title(f'Spectral Data with chl-a = 0 (n={len(group1_indices)})') + plt.ylabel('Reflectance') + plt.legend() + plt.grid(True, linestyle='--', alpha=0.3) + + # 第二组:chl-a≠0 + ax2 = plt.subplot(2, 1, 2, sharex=ax1) # 共享x轴范围 + if len(group2_indices) > 0: + group2_data = spectral_data[group2_indices, :] + for i in range(group2_data.shape[0]): + plt.plot(wavelengths, group2_data[i, :], linewidth=1, alpha=0.7) + + mean_spectrum = np.mean(group2_data, axis=0) + plt.plot(wavelengths, mean_spectrum, 'k-', linewidth=2.5, label='Average Spectrum') + + # 添加波长标记线 + y_min, y_max = plt.ylim() + for i, wave in enumerate(highlight_wavelengths): + plt.axvline(x=wave, color=colors[i], linestyle='--', alpha=0.7, + label=f'{wave}nm' if i == 0 else '') + plt.text(wave, y_max * 0.95, f'{wave}nm', + fontsize=10, color=colors[i], ha='center', va='top') + + plt.title(f'Spectral Data with chl-a ≠ 0 (n={len(group2_indices)})') + plt.xlabel('Wavelength (nm)') + plt.ylabel('Reflectance') + plt.legend() + plt.grid(True, linestyle='--', alpha=0.3) + + # 调整布局并保存 + plt.tight_layout() + plt.savefig( + r'D:\WQ\zhanghuilai\hyperspectral-inversion\data\input\一代高光谱\2025-07-11_2025-07-21\plot\spectral_groups_highlighted.png', + dpi=300 + ) + plt.show() + + +# 使用示例 +if __name__ == "__main__": + spectral_csv = r"D:\WQ\zhanghuilai\hyperspectral-inversion\data\input\一代高光谱\2025-07-11_2025-07-21\筛选.csv" + chla_csv = r"D:\WQ\zhanghuilai\hyperspectral-inversion\data\input\一代高光谱\2025-07-11_2025-07-21\不去耀斑index.csv" + plot_spectra(spectral_csv, chla_csv) \ No newline at end of file diff --git a/data_spectral.py b/data_spectral.py new file mode 100644 index 0000000..9542fed --- /dev/null +++ b/data_spectral.py @@ -0,0 +1,66 @@ +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np +from datetime import datetime + +# 读取CSV文件 +file_path = r"D:\WQ\zhanghuilai\hyperspectral-inversion\data\input\一代高光谱\2025-07-11_2025-07-21\筛选.csv" # 替换为你的文件路径 +df = pd.read_csv(file_path) + +# 获取第一列名(时间戳列) +timestamp_col = df.columns[0] + +# 解析时间戳列(注意格式:2025/7/7 15:03) +df[timestamp_col] = pd.to_datetime(df[timestamp_col], format='%Y-%m-%d %H:%M:%S') + +# 提取日期列(用于分组) +df['Date'] = df[timestamp_col].dt.date + +# 获取波长数据(第一行,跳过时间戳列) +# 尝试将列名转换为浮点数,如果失败则保持原样 +wavelengths = [] +for col in df.columns[1:-1]: + try: + wavelengths.append(float(col)) + except ValueError: + wavelengths.append(col) + +# 按日期分组计算平均光谱 +daily_avg = df.groupby('Date')[df.columns[1:-1]].mean() + +# 设置绘图参数 +plt.figure(figsize=(12, 8)) +plt.title('Daily Average Spectral Reflectance', fontsize=16) +plt.xlabel('Wavelength (nm)', fontsize=14) +plt.ylabel('Reflectance', fontsize=14) +plt.grid(True, alpha=0.3) + +# 为每条曲线生成不同的颜色 +colors = plt.cm.viridis(np.linspace(0, 1, len(daily_avg))) + +# 绘制每条平均光谱曲线 +for i, (date, row) in enumerate(daily_avg.iterrows()): + # 将日期格式化为更易读的形式 + formatted_date = date.strftime('%Y-%m-%d') + + # 绘制光谱曲线 + plt.plot(wavelengths, row.values, + label=formatted_date, + color=colors[i], + linewidth=2) + +# 添加图例 +plt.legend(title='Date', bbox_to_anchor=(1.05, 1), loc='upper left') + +# 优化布局 +plt.tight_layout() + +# 保存图像 +plt.savefig(r'D:\WQ\zhanghuilai\hyperspectral-inversion\data\input\一代高光谱\2025-07-11_2025-07-21\plot\daily_average_spectra.png', dpi=300, bbox_inches='tight') + + + +# 输出每日平均光谱数据到CSV +# daily_avg.to_csv('D:\WQ\zhanghuilai\hyperspectral-inversion\data\input\一代高光谱\daily_average_spectra.csv') +print("每日平均光谱数据已保存到 daily_average_spectra.csv") +print("光谱图已保存到 daily_average_spectra.png") \ No newline at end of file diff --git a/exportDSData2csv.py b/exportDSData2csv.py new file mode 100644 index 0000000..2e08c99 --- /dev/null +++ b/exportDSData2csv.py @@ -0,0 +1,238 @@ +import struct +import pandas as pd +import csv +import argparse +import sys + + +def parse_metadata(hex_data): + """ + 解析元数据部分(前112个字符) + 元数据结构: + type (1字节) : 位置 0-1 + direction (1字节) : 位置 2-3 + tuigan_stat (1字节) : 位置 4-5 + year (1字节) : 位置 6-7 + month (1字节) : 位置 8-9 + day (1字节) : 位置 10-11 + hour (1字节) : 位置 12-13 + minute (1字节) : 位置 14-15 + second (1字节) : 位置 16-17 + NCa (1字节) : 位置 18-19 + Ncb (1字节) : 位置 20-21 + NCC (1字节) : 位置 22-23 + shutter_time (4字节): 位置 24-31 + index (8字节) : 位置 32-47 + temperature (32字节): 位置 48-111 (8个float,每个4字节) + """ + # 验证元数据长度 + if len(hex_data) < 112: + raise ValueError(f"元数据长度不足112个字符,实际长度: {len(hex_data)}") + + # 解析单字节整数 (u_int8_t) + def parse_u8(hex_str): + return int(hex_str, 16) + + # 解析多字节整数 (小端序) + def parse_u32(hex_str): + return struct.unpack('