Files
hyperspectral-hongshengrese…/Chla-time-plot.py
2025-07-22 10:27:31 +08:00

211 lines
8.2 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
from datetime import datetime
# 设置叶绿素a阈值
CHLOROPHYLL_THRESHOLD = 100.0 # 大于此值的叶绿素a数据将被剔除
# 1. 读取CSV文件
try:
# 尝试自动检测文件格式
df = pd.read_csv(
r"D:\WQ\zhanghuilai\hyperspectral-inversion\data\input\一代高光谱\2025-07-11_2025-07-21\不去耀斑index.csv",
header=None, skip_blank_lines=True, on_bad_lines='warn')
# 检查列数,可能有多余列
if df.shape[1] > 2:
print(f"警告:检测到 {df.shape[1]}但预期只有2列。尝试提取前两列。")
df = df.iloc[:, :2] # 只保留前两列
# 重命名列
df.columns = ['Time', 'Chl_a']
# 打印前几行用于调试
print("原始数据预览:")
print(df.head())
# 检查是否有标题行
if not isinstance(df['Time'].iloc[0], str) or any(char.isalpha() for char in str(df['Time'].iloc[0])):
print("检测到可能的标题行,跳过第一行")
df = pd.read_csv(
r"D:\WQ\zhanghuilai\hyperspectral-inversion\data\input\一代高光谱\2025-07-11_2025-07-21\不去耀斑index.csv",
header=None, skiprows=1, skip_blank_lines=True)
df = df.iloc[:, :2] # 只保留前两列
df.columns = ['Time', 'Chl_a']
print("处理后数据预览:")
print(df.head())
except Exception as e:
print(f"读取文件时出错: {e}")
exit()
# 2. 转换时间格式
try:
# 尝试解析时间
df['Time'] = pd.to_datetime(df['Time'], errors='coerce', format='%Y/%m/%d %H:%M')
# 检查是否有解析失败的时间
if df['Time'].isna().any():
print("部分时间解析失败,尝试其他格式...")
# 尝试其他常见格式
df['Time'] = pd.to_datetime(df['Time'], errors='coerce', format='%Y-%m-%d %H:%M')
df['Time'] = pd.to_datetime(df['Time'], errors='coerce', format='%Y/%m/%d %H:%M:%S')
df['Time'] = pd.to_datetime(df['Time'], errors='coerce', format='%Y-%m-%d %H:%M:%S')
# 如果仍有失败,尝试自动推断
if df['Time'].isna().any():
print("使用混合格式解析时间")
df['Time'] = pd.to_datetime(df['Time'], errors='coerce', format='mixed')
# 删除无法解析时间的行
na_count = df['Time'].isna().sum()
if na_count > 0:
print(f"警告: 删除了 {na_count} 行无法解析的时间数据")
df = df.dropna(subset=['Time'])
# 提取日期和时间
df['Date'] = df['Time'].dt.date
df['Time_of_day'] = df['Time'].dt.time
except Exception as e:
print(f"时间解析错误: {e}")
exit()
# 3. 确保叶绿素a列是数值型并应用阈值过滤
try:
df['Chl_a'] = pd.to_numeric(df['Chl_a'], errors='coerce')
# 记录过滤前的数据量
original_count = len(df)
# 应用阈值过滤
above_threshold = df[df['Chl_a'] > CHLOROPHYLL_THRESHOLD]
if not above_threshold.empty:
print(f"警告: 发现 {len(above_threshold)} 行叶绿素a值超过阈值 {CHLOROPHYLL_THRESHOLD} μg/L将被剔除")
print(above_threshold)
df = df[df['Chl_a'] <= CHLOROPHYLL_THRESHOLD]
# 删除无效值
na_count = df['Chl_a'].isna().sum()
if na_count > 0:
print(f"警告: 删除了 {na_count} 行无效的叶绿素值")
df = df.dropna(subset=['Chl_a'])
# 报告过滤结果
filtered_count = len(df)
print(f"数据过滤结果: 原始 {original_count} 行 → 保留 {filtered_count}")
except Exception as e:
print(f"叶绿素值转换错误: {e}")
exit()
# 4. 按日期分组计算统计量
daily_stats = df.groupby('Date')['Chl_a'].agg(['mean', 'std', 'count']).reset_index()
daily_stats.columns = ['Date', 'Daily_Mean', 'Std_Dev', 'Sample_Count']
# 获取唯一日期
unique_dates = sorted(df['Date'].unique())
# 5. 创建绘图 - 箱型图
fig1, ax1 = plt.subplots(figsize=(14, 6))
# 绘制箱型图
ax1.boxplot([df[df['Date'] == date]['Chl_a'] for date in unique_dates], labels=[str(date) for date in unique_dates])
# 添加平均线
ax1.plot(range(1, len(unique_dates) + 1), daily_stats['Daily_Mean'], color='red', linestyle='--', label='Daily Mean')
ax1.set_title('Daily Chlorophyll-a Concentration with Box Plot', fontsize=14)
ax1.set_ylabel('Chl-a (μg/L)', fontsize=12)
ax1.grid(axis='y', linestyle='--', alpha=0.7)
# 添加图例
ax1.legend()
plt.tight_layout()
plt.savefig(
r'D:\WQ\zhanghuilai\hyperspectral-inversion\data\input\一代高光谱\2025-07-11_2025-07-21\plot\daily_chlorophyll_boxplot.png',
dpi=300, bbox_inches='tight')
#
# # 6. 创建每日时间序列图 - 每个日期一个子图
# fig2, axs = plt.subplots(len(unique_dates), 1, figsize=(14, 4 * len(unique_dates)), sharex=True)
#
# if len(unique_dates) == 1:
# axs = [axs] # 确保单个子图时也能正确处理
#
# fig2.suptitle('Chlorophyll-a Concentration Variation Throughout the Day', fontsize=16, y=0.98)
#
# for i, date in enumerate(unique_dates):
# daily_data = df[df['Date'] == date].sort_values('Time')
# ax = axs[i]
#
# # 使用真实时间作横轴
# ax.plot(daily_data['Time'], daily_data['Chl_a'], marker='', linestyle='-', color='royalblue',
# label=f'Chl-a on {date}', alpha=0.8)
#
# # 添加统计信息
# daily_mean = daily_data['Chl_a'].mean()
# daily_std = daily_data['Chl_a'].std()
# ax.axhline(y=daily_mean, color='g', linestyle='--', alpha=0.7)
# ax.axhspan(daily_mean - daily_std, daily_mean + daily_std, color='lightgreen', alpha=0.3)
#
# ax.set_title(f"{date.strftime('%Y-%m-%d')} (n={len(daily_data)})", fontsize=12)
# ax.set_ylabel('Chl-a (μg/L)', fontsize=10)
# ax.grid(True, linestyle='--', alpha=0.7)
#
# ax.legend(loc='upper left')
#
# # 自动优化时间刻度显示
# fig2.autofmt_xdate(rotation=45)
#
# # 保存每个日期的图像,确保文件名唯一
# save_path = f'D:/WQ/zhanghuilai/hyperspectral-inversion/data/input/一代高光谱/2025-07-11_2025-07-21/plot/daily_chlorophyll_timeseries_{date.strftime("%Y-%m-%d")}.png'
# fig2.savefig(save_path, dpi=300, bbox_inches='tight')
# 6. 为每个日期单独创建图像
for date in unique_dates:
daily_data = df[df['Date'] == date].sort_values('Time')
fig, ax = plt.subplots(figsize=(14, 4))
# 使用真实时间作横轴
ax.plot(daily_data['Time'], daily_data['Chl_a'], marker='o', linestyle='-', color='royalblue',
label=f'Chl-a on {date}', alpha=0.8)
# 添加统计信息
daily_mean = daily_data['Chl_a'].mean()
daily_std = daily_data['Chl_a'].std()
ax.axhline(y=daily_mean, color='g', linestyle='--', alpha=0.7, label='Daily Mean')
ax.axhspan(daily_mean - daily_std, daily_mean + daily_std, color='lightgreen', alpha=0.3, label='±1 Std Dev')
ax.set_title(f"Chlorophyll-a Time Series on {date.strftime('%Y-%m-%d')} (n={len(daily_data)})", fontsize=13)
ax.set_xlabel("Time", fontsize=11)
ax.set_ylabel("Chl-a (μg/L)", fontsize=11)
ax.grid(True, linestyle='--', alpha=0.7)
# 设置时间格式优化显示
ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
fig.autofmt_xdate(rotation=45)
ax.legend(loc='upper left')
# 保存图像
save_path = f'D:/WQ/zhanghuilai/hyperspectral-inversion/data/input/一代高光谱/2025-07-11_2025-07-21/plot/daily_chlorophyll_timeseries_{date.strftime("%Y-%m-%d")}.png'
fig.savefig(save_path, dpi=300, bbox_inches='tight')
plt.close(fig) # 防止内存过多占用
# 打印数据统计摘要
print("\n数据统计摘要:")
print(f"叶绿素a阈值: {CHLOROPHYLL_THRESHOLD} μg/L")
print(f"总样本数: {len(df)}")
print(f"监测天数: {len(unique_dates)}")
print(f"平均每日样本数: {len(df) / len(unique_dates):.1f}")
print(f"叶绿素a总体均值: {df['Chl_a'].mean():.2f} ± {df['Chl_a'].std():.2f} μg/L")
print(f"叶绿素a最小值: {df['Chl_a'].min():.2f} μg/L")
print(f"叶绿素a最大值: {df['Chl_a'].max():.2f} μg/L")
print("\n每日统计:")
print(daily_stats)