1858 lines
89 KiB
Python
1858 lines
89 KiB
Python
#!/usr/bin/env python
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
水质参数反演分析 Word 报告生成模块
|
||
根据 visualization_reports.py 生成的图片,自动生成结构化 Word 报告
|
||
"""
|
||
|
||
import os
|
||
import sys
|
||
import base64
|
||
import json
|
||
from dataclasses import dataclass
|
||
from pathlib import Path
|
||
from typing import Dict, List, Optional, Any
|
||
from datetime import datetime
|
||
from urllib.request import Request, urlopen
|
||
from urllib.error import URLError, HTTPError
|
||
from docx import Document
|
||
from docx.shared import Inches, Pt, Cm
|
||
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
||
from docx.enum.section import WD_SECTION
|
||
from docx.oxml.ns import qn
|
||
|
||
|
||
def get_resource_path(relative_path: str) -> str:
|
||
"""获取资源的绝对路径,适配 PyInstaller 打包环境。"""
|
||
if hasattr(sys, '_MEIPASS'):
|
||
return os.path.join(sys._MEIPASS, relative_path)
|
||
return os.path.abspath(
|
||
os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), relative_path)
|
||
)
|
||
from docx.oxml import OxmlElement
|
||
from docx.shared import RGBColor
|
||
import pandas as pd
|
||
|
||
|
||
class _SimpleProgress:
|
||
"""无依赖进度条(控制台单行刷新)。"""
|
||
|
||
def __init__(self, total: int, desc: str = ""):
|
||
self.total = max(1, int(total))
|
||
self.desc = desc
|
||
self.n = 0
|
||
self._render()
|
||
|
||
def update(self, step: int = 1):
|
||
self.n = min(self.total, self.n + int(step))
|
||
self._render()
|
||
|
||
def close(self):
|
||
# 换行,避免覆盖后续输出
|
||
print()
|
||
|
||
def _render(self):
|
||
pct = int(self.n / self.total * 100)
|
||
bar_len = 30
|
||
filled = int(bar_len * self.n / self.total)
|
||
bar = "█" * filled + "·" * (bar_len - filled)
|
||
prefix = f"{self.desc} " if self.desc else ""
|
||
print(f"\r{prefix}[{bar}] {self.n}/{self.total} ({pct}%)", end="", flush=True)
|
||
|
||
|
||
@dataclass
|
||
class ReportGenerationConfig:
|
||
"""
|
||
报告生成与 AI 分析的可选配置。
|
||
支持 Ollama 和 Minimax 两种后端,通过 AI_PROVIDER 环境变量切换。
|
||
未设置的字段沿用环境变量或生成器默认值。
|
||
"""
|
||
# 通用
|
||
ai_provider: Optional[str] = None # "ollama" | "minimax",默认 "minimax"
|
||
enable_ai_analysis: Optional[bool] = None
|
||
# Ollama 专属
|
||
ollama_base_url: Optional[str] = None
|
||
ollama_vision_model: Optional[str] = None
|
||
ollama_text_model: Optional[str] = None
|
||
ollama_timeout_s: Optional[int] = None
|
||
# Minimax 专属
|
||
minimax_api_key: Optional[str] = None
|
||
minimax_vision_model: Optional[str] = None
|
||
minimax_text_model: Optional[str] = None
|
||
minimax_timeout_s: Optional[int] = None
|
||
|
||
|
||
class WaterQualityReportGenerator:
|
||
"""水质参数 Word 报告生成器"""
|
||
|
||
def __init__(
|
||
self,
|
||
output_dir: str = None,
|
||
work_dir: str = None,
|
||
ai_config: Optional[ReportGenerationConfig] = None,
|
||
):
|
||
# 设置工作目录(整个流程的核心目录,所有数据基于此)
|
||
if work_dir is None:
|
||
self.work_dir = Path("./work_dir")
|
||
else:
|
||
self.work_dir = Path(work_dir)
|
||
|
||
# 基于工作目录设置各子目录
|
||
self.visualization_dir = self.work_dir / "14_visualization"
|
||
|
||
# 设置报告保存位置:默认为可视化目录(visualization_dir)
|
||
self._output_dir_is_default = output_dir is None
|
||
if output_dir is None:
|
||
self.output_dir = self.visualization_dir
|
||
else:
|
||
self.output_dir = Path(output_dir)
|
||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||
|
||
# 设置中文字体支持
|
||
self.chinese_font = 'SimSun' # 宋体
|
||
self.title_font = 'SimHei' # 黑体
|
||
self.english_font = 'Times New Roman' # 英文
|
||
|
||
cfg = ai_config
|
||
# AI Provider 选择:默认 "minimax"
|
||
self.ai_provider = (
|
||
cfg.ai_provider
|
||
if cfg and cfg.ai_provider
|
||
else os.environ.get("AI_PROVIDER", "minimax").lower()
|
||
)
|
||
|
||
# Ollama 配置
|
||
default_url = os.environ.get("OLLAMA_URL", "http://localhost:11434").rstrip("/")
|
||
self.ollama_base_url = (
|
||
cfg.ollama_base_url.rstrip("/")
|
||
if cfg and cfg.ollama_base_url
|
||
else default_url
|
||
)
|
||
self.ollama_vision_model = (
|
||
cfg.ollama_vision_model
|
||
if cfg and cfg.ollama_vision_model
|
||
else os.environ.get("OLLAMA_VISION_MODEL", "qwen3-vl:8b")
|
||
)
|
||
self.ollama_text_model = (
|
||
cfg.ollama_text_model
|
||
if cfg and cfg.ollama_text_model
|
||
else os.environ.get("OLLAMA_TEXT_MODEL", self.ollama_vision_model)
|
||
)
|
||
self.ollama_timeout_s = (
|
||
int(cfg.ollama_timeout_s)
|
||
if cfg and cfg.ollama_timeout_s is not None
|
||
else int(os.environ.get("OLLAMA_TIMEOUT_S", "120"))
|
||
)
|
||
|
||
# Minimax 配置
|
||
self.minimax_api_key = (
|
||
cfg.minimax_api_key
|
||
if cfg and cfg.minimax_api_key
|
||
else os.environ.get("MINIMAX_API_KEY", "")
|
||
)
|
||
self.minimax_base_url = (
|
||
os.environ.get("MINIMAX_BASE_URL", "https://api.minimaxi.com/v1/text/chatcompletion_v2").rstrip("/")
|
||
)
|
||
self.minimax_vision_model = (
|
||
cfg.minimax_vision_model
|
||
if cfg and cfg.minimax_vision_model
|
||
else os.environ.get("MINIMAX_VISION_MODEL", "abab6.5s-chat")
|
||
)
|
||
self.minimax_text_model = (
|
||
cfg.minimax_text_model
|
||
if cfg and cfg.minimax_text_model
|
||
else os.environ.get("MINIMAX_TEXT_MODEL", "abab6.5s-chat")
|
||
)
|
||
self.minimax_timeout_s = (
|
||
int(cfg.minimax_timeout_s)
|
||
if cfg and cfg.minimax_timeout_s is not None
|
||
else int(os.environ.get("MINIMAX_TIMEOUT_S", "120"))
|
||
)
|
||
|
||
# 通用配置
|
||
if cfg and cfg.enable_ai_analysis is not None:
|
||
self.enable_ai_analysis = bool(cfg.enable_ai_analysis)
|
||
else:
|
||
self.enable_ai_analysis = os.environ.get("ENABLE_AI_ANALYSIS", "1") not in {
|
||
"0",
|
||
"false",
|
||
"False",
|
||
}
|
||
self.ai_cache_path = self.output_dir / "ollama_image_analyses_cache.json"
|
||
|
||
# 各参数的专业描述(完整版)
|
||
self.parameter_descriptions = {
|
||
"Chlorophyll": """叶绿素(Chlorophyll)是浮游植物进行光合作用的关键色素,直接反映水体中藻类的生物量与初级生产力水平。它是评价水体富营养化程度最常用的指标之一。当叶绿素浓度持续升高时,表明藻类大量增殖,水华风险显著增加,并可能引发溶解氧剧烈波动、水体透明度下降及底栖生态系统退化。因此,通过遥感手段反演叶绿素浓度,可为水华预警、水质改善及生态修复提供重要科学依据。""",
|
||
|
||
"COD": """化学需氧量(COD)是衡量水体中有机污染物含量的综合指标,反映单位体积水体中还原性物质(主要是有机物)被氧化所消耗的氧化剂总量。COD值越高,表明水体受有机污染越严重。高COD会加剧溶解氧消耗,导致水体缺氧、水生生物死亡,甚至引发黑臭现象。COD也是污水处理效果和污染物排放管控的关键考核指标,其时空分布可为污染源识别与治理提供直接依据。""",
|
||
|
||
"DO": """溶解氧(DO)是维持水生生态系统健康的基础物质,指溶解在水中的分子态氧。其浓度受水温、盐度、藻类光合作用及有机物耗氧过程共同调控。DO低于一定阈值会导致水生生物窒息、底泥营养盐释放及水体自净能力下降。DO的实时监测与空间分布反演,对判断水体污染程度、预警鱼类死亡事件及评估生态修复成效具有重要价值。""",
|
||
|
||
"PH": """pH值是反映水体酸碱度的无量纲参数,直接影响水中化学形态、微生物活性和水生生物的生理代谢。天然水体pH值一般介于6.5~8.5之间,当pH值过低(酸化)或过高(碱化)时,会破坏水生生态平衡,加速重金属溶出,对鱼类鳃组织及藻类群落造成胁迫。pH的时空变化可用于识别酸性废水排放、藻类暴发过程以及水化学环境的稳定性评估。""",
|
||
|
||
"Temperature": """水温(Temperature)是水体物理特性的基本参数,控制着溶解氧饱和度、化学反应速率及生物代谢强度。水温异常升高(如热污染)或昼夜温差剧烈波动,会影响鱼类洄游、藻类生长节律及底泥污染物释放。水温也是水文模型与水质模型的关键输入变量,其卫星遥感反演为大型水体热状况监测提供了高效手段。""",
|
||
|
||
"spCond": """电导率(spCond)表征水体传导电流的能力,与溶解性离子总浓度密切相关。它常用于指示水体矿化度、盐度以及受工业废水、生活污水或农业径流污染的程度。电导率的快速变化往往预示着外源污染输入或海水入侵,是水质常规监测中重要的物理参数,其空间分布图可为污染源追踪提供直观线索。""",
|
||
|
||
"Turbidity": """浊度(Turbidity)反映水体中悬浮颗粒物(如泥沙、藻类、微生物)对光线的散射程度,是衡量水体透明度的关键指标。浊度升高不仅影响水生植物光合作用,还会为病原微生物提供附着载体,干扰水处理工艺。通过遥感影像反演浊度,可实现大范围、高频次的水体清澈度评价,对饮用水源地保护和河流泥沙输送研究具有重要意义。""",
|
||
|
||
"TDS": """总溶解固体(TDS)指水中溶解性无机盐和部分有机物的总质量,与水的适口性、管道腐蚀风险及灌溉适宜性密切相关。TDS过高会导致水味苦涩,并可能伴随有害微量元素积累。在咸潮入侵、工业排放及农业面源污染研究中,TDS是评价水质变化的稳定指标,其反演结果有助于识别淡水咸化区域及制定取水策略。""",
|
||
|
||
"Cl-": """氯离子(Cl-)是天然水体中最稳定存在的阴离子之一,其来源包括岩石风化、海水侵入、工业废水及生活污水。氯离子含量升高可指示水体受咸潮或污染输入的影响,且在高浓度下会腐蚀管道、影响农业土壤结构。在饮用水消毒过程中,氯离子与有机物可能生成三氯甲烷等消毒副产物,因此其监测对水厂运行和水安全有重要警示作用。""",
|
||
|
||
"NO3-N": """硝酸盐氮(NO3-N)是氮循环中氧化程度最高的形态,易溶于水,常通过农田径流、化粪池渗漏或工业废水进入水体。过量硝酸盐会刺激藻类过度生长,加速水体富营养化;饮用水中硝酸盐氮浓度超标会引发“蓝婴症”(高铁血红蛋白血症),对婴幼儿健康构成威胁。因此,硝酸盐氮是水质评价与饮用水安全监管的重点指标。""",
|
||
|
||
"NH3-N": """氨氮(NH3-N)是水体受有机污染初期的重要指示物,主要来源于生活污水、农业化肥及工业含氮废水。氨氮对鱼类等水生生物有较强的毒性,且在好氧条件下会消耗大量溶解氧转化为硝酸盐。氨氮浓度高往往反映近期污染输入或水体自净能力不足,其动态变化可用于预警突发性污染事件和评估生态修复效果。""",
|
||
|
||
"BGA": """BGA(蓝绿藻,即蓝藻)是表征水体蓝藻生物量的关键生物参数,通常通过藻蓝蛋白等特征色素反演获得。蓝藻过量繁殖(水华)会释放藻毒素、消耗溶解氧、形成水面覆盖层,严重威胁饮用水安全和水生态系统健康。BGA浓度的空间分布能精准指示水华高发区域与迁移路径,是水华预警、蓝藻治理和生态修复措施制定不可或缺的输入信息。""",
|
||
|
||
"TT": """总氮(TT)是水体中有机氮、氨氮、硝酸盐氮、亚硝酸盐氮等各种形态氮的总和,综合反映了水体的氮营养水平。总氮是导致水体富营养化的主要限制因子之一,其浓度过高会引发藻类爆发、透明度下降、水质恶化。总氮的时空变化趋势可用于判断流域面源污染强度、评估氮减排措施成效,是水质管理和流域水环境保护的关键参考指标。"""
|
||
}
|
||
|
||
# 每个参数对应的图片顺序(统一5张图模式)
|
||
self.parameter_images = {
|
||
"Chlorophyll": [
|
||
"Chlorophyll_histogram.png",
|
||
"Chlorophyll_spectrum_comparison.png",
|
||
"Chlorophyll_scatter_with_confidence.png",
|
||
"Chlorophyll_boxplot.png",
|
||
"Chlorophyll_distribution.png"
|
||
],
|
||
"COD": [
|
||
"COD_histogram.png",
|
||
"COD_spectrum_comparison.png",
|
||
"COD_scatter_with_confidence.png",
|
||
"COD_boxplot.png",
|
||
"COD_distribution.png"
|
||
],
|
||
"DO": [
|
||
"DO_histogram.png",
|
||
"DO_spectrum_comparison.png",
|
||
"DO_scatter_with_confidence.png",
|
||
"DO_boxplot.png",
|
||
"DO_distribution.png"
|
||
],
|
||
"PH": [
|
||
"PH_histogram.png",
|
||
"PH_spectrum_comparison.png",
|
||
"PH_scatter_with_confidence.png",
|
||
"PH_boxplot.png",
|
||
"PH_distribution.png"
|
||
],
|
||
"Temperature": [
|
||
"Temperature_histogram.png",
|
||
"Temperature_spectrum_comparison.png",
|
||
"Temperature_scatter_with_confidence.png",
|
||
"Temperature_boxplot.png",
|
||
"Temperature_distribution.png"
|
||
],
|
||
"spCond": [
|
||
"spCond_histogram.png",
|
||
"spCond_spectrum_comparison.png",
|
||
"spCond_scatter_with_confidence.png",
|
||
"spCond_boxplot.png",
|
||
"spCond_distribution.png"
|
||
],
|
||
"Turbidity": [
|
||
"Turbidity_histogram.png",
|
||
"Turbidity_spectrum_comparison.png",
|
||
"Turbidity_scatter_with_confidence.png",
|
||
"Turbidity_boxplot.png",
|
||
"Turbidity_distribution.png"
|
||
],
|
||
"TDS": [
|
||
"TDS_histogram.png",
|
||
"TDS_spectrum_comparison.png",
|
||
"TDS_scatter_with_confidence.png",
|
||
"TDS_boxplot.png",
|
||
"TDS_distribution.png"
|
||
],
|
||
"Cl-": [
|
||
"Cl-histogram.png",
|
||
"Cl-spectrum_comparison.png",
|
||
"Cl-scatter_with_confidence.png",
|
||
"Cl-boxplot.png",
|
||
"Cl-distribution.png"
|
||
],
|
||
"NO3-N": [
|
||
"NO3-N_histogram.png",
|
||
"NO3-N_spectrum_comparison.png",
|
||
"NO3-N_scatter_with_confidence.png",
|
||
"NO3-N_boxplot.png",
|
||
"NO3-N_distribution.png"
|
||
],
|
||
"NH3-N": [
|
||
"NH3-N_histogram.png",
|
||
"NH3-N_spectrum_comparison.png",
|
||
"NH3-N_scatter_with_confidence.png",
|
||
"NH3-N_boxplot.png",
|
||
"NH3-N_distribution.png"
|
||
],
|
||
"BGA": [
|
||
"BGA_histogram.png",
|
||
"BGA_spectrum_comparison.png",
|
||
"BGA_scatter_with_confidence.png",
|
||
"BGA_boxplot.png",
|
||
"BGA_distribution.png"
|
||
],
|
||
"TT": [
|
||
"TT_histogram.png",
|
||
"TT_spectrum_comparison.png",
|
||
"TT_scatter_with_confidence.png",
|
||
"TT_boxplot.png",
|
||
"TT_distribution.png"
|
||
]
|
||
}
|
||
|
||
def apply_ai_config(self, ai_config: ReportGenerationConfig) -> None:
|
||
"""在已创建的生成器上更新 AI 相关设置(下次 _ai_chat 生效)。"""
|
||
cfg = ai_config
|
||
if cfg.ai_provider:
|
||
self.ai_provider = cfg.ai_provider.lower()
|
||
if cfg.ollama_base_url:
|
||
self.ollama_base_url = cfg.ollama_base_url.rstrip("/")
|
||
if cfg.ollama_vision_model:
|
||
self.ollama_vision_model = cfg.ollama_vision_model
|
||
if cfg.ollama_text_model:
|
||
self.ollama_text_model = cfg.ollama_text_model
|
||
if cfg.ollama_timeout_s is not None:
|
||
self.ollama_timeout_s = int(cfg.ollama_timeout_s)
|
||
if cfg.minimax_api_key:
|
||
self.minimax_api_key = cfg.minimax_api_key
|
||
if cfg.minimax_vision_model:
|
||
self.minimax_vision_model = cfg.minimax_vision_model
|
||
if cfg.minimax_text_model:
|
||
self.minimax_text_model = cfg.minimax_text_model
|
||
if cfg.minimax_timeout_s is not None:
|
||
self.minimax_timeout_s = int(cfg.minimax_timeout_s)
|
||
if cfg.enable_ai_analysis is not None:
|
||
self.enable_ai_analysis = bool(cfg.enable_ai_analysis)
|
||
|
||
def _style_heading(self, heading, level: int):
|
||
"""统一一级/二级/三级标题字体(黑体)与字号。"""
|
||
size_map = {1: Pt(16), 2: Pt(14), 3: Pt(12)}
|
||
for run in heading.runs:
|
||
run.font.name = self.title_font
|
||
run.font.bold = True
|
||
if level in size_map:
|
||
run.font.size = size_map[level]
|
||
run._element.rPr.rFonts.set(qn('w:eastAsia'), self.title_font)
|
||
|
||
def _load_ai_cache(self) -> Dict[str, Any]:
|
||
if not self.ai_cache_path.exists():
|
||
return {}
|
||
try:
|
||
with open(self.ai_cache_path, "r", encoding="utf-8") as f:
|
||
return json.load(f)
|
||
except Exception:
|
||
return {}
|
||
|
||
def _save_ai_cache(self, cache: Dict[str, Any]) -> None:
|
||
try:
|
||
with open(self.ai_cache_path, "w", encoding="utf-8") as f:
|
||
json.dump(cache, f, ensure_ascii=False, indent=2)
|
||
except Exception:
|
||
pass
|
||
|
||
def _ollama_chat(self, model: str, system_prompt: str, user_prompt: str, image_path: Optional[Path] = None) -> str:
|
||
"""调用 Ollama /api/chat。image_path 传入时进行视觉分析。"""
|
||
payload: Dict[str, Any] = {
|
||
"model": model,
|
||
"stream": False,
|
||
"messages": [
|
||
{"role": "system", "content": system_prompt},
|
||
{"role": "user", "content": user_prompt},
|
||
],
|
||
}
|
||
|
||
if image_path is not None:
|
||
try:
|
||
img_b64 = base64.b64encode(image_path.read_bytes()).decode("utf-8")
|
||
payload["messages"][-1]["images"] = [img_b64]
|
||
except Exception as e:
|
||
return f"(读取图片失败:{e})"
|
||
|
||
data = json.dumps(payload, ensure_ascii=False).encode("utf-8")
|
||
req = Request(
|
||
url=f"{self.ollama_base_url}/api/chat",
|
||
data=data,
|
||
headers={"Content-Type": "application/json"},
|
||
method="POST",
|
||
)
|
||
|
||
try:
|
||
with urlopen(req, timeout=self.ollama_timeout_s) as resp:
|
||
raw = resp.read().decode("utf-8", errors="ignore")
|
||
obj = json.loads(raw)
|
||
return (obj.get("message") or {}).get("content", "").strip() or "(模型未返回内容)"
|
||
except (HTTPError, URLError, TimeoutError) as e:
|
||
return f"(Ollama调用失败:{e})"
|
||
except Exception as e:
|
||
return f"(Ollama解析失败:{e})"
|
||
|
||
def _call_minimax_text(self, system_prompt: str, user_prompt: str) -> str:
|
||
"""调用 Minimax 文本模型 /v1/text/chatcompletion_v2。"""
|
||
if not self.minimax_api_key:
|
||
return "(Minimax API Key 未配置,请设置 MINIMAX_API_KEY 环境变量)"
|
||
|
||
payload: Dict[str, Any] = {
|
||
"model": self.minimax_text_model,
|
||
"messages": [
|
||
{"role": "system", "content": system_prompt},
|
||
{"role": "user", "content": user_prompt},
|
||
],
|
||
}
|
||
|
||
data = json.dumps(payload, ensure_ascii=False).encode("utf-8")
|
||
req = Request(
|
||
url=self.minimax_base_url,
|
||
data=data,
|
||
headers={
|
||
"Authorization": f"Bearer {self.minimax_api_key}",
|
||
"Content-Type": "application/json",
|
||
},
|
||
method="POST",
|
||
)
|
||
|
||
try:
|
||
with urlopen(req, timeout=self.minimax_timeout_s) as resp:
|
||
raw = resp.read().decode("utf-8", errors="ignore")
|
||
obj = json.loads(raw)
|
||
return (
|
||
obj.get("choices", [{}])[0]
|
||
.get("message", {})
|
||
.get("content", "")
|
||
.strip()
|
||
or "(模型未返回内容)"
|
||
)
|
||
except HTTPError as e:
|
||
body = e.read().decode("utf-8", errors="ignore")
|
||
print(f"[Minimax HTTP {e.code}] {body}")
|
||
return f"(Minimax调用失败 HTTP {e.code}:{e.reason})"
|
||
except (URLError, TimeoutError) as e:
|
||
return f"(Minimax调用失败:{e})"
|
||
except Exception as e:
|
||
return f"(Minimax解析失败:{e})"
|
||
|
||
def _call_minimax_vision(self, system_prompt: str, user_prompt: str, image_path: Path) -> str:
|
||
"""调用 Minimax 视觉模型(多模态),图片转为 base64 后通过 image_url 传入。"""
|
||
if not self.minimax_api_key:
|
||
return "(Minimax API Key 未配置,请设置 MINIMAX_API_KEY 环境变量)"
|
||
|
||
try:
|
||
img_bytes = image_path.read_bytes()
|
||
img_b64 = base64.b64encode(img_bytes).decode("utf-8")
|
||
except Exception as e:
|
||
return f"(读取图片失败:{e})"
|
||
|
||
payload: Dict[str, Any] = {
|
||
"model": self.minimax_vision_model,
|
||
"messages": [
|
||
{
|
||
"role": "user",
|
||
"content": [
|
||
{"type": "text", "text": user_prompt},
|
||
{
|
||
"type": "image_url",
|
||
"image_url": {"url": f"data:image/jpeg;base64,{img_b64}"},
|
||
},
|
||
],
|
||
}
|
||
],
|
||
}
|
||
|
||
if system_prompt:
|
||
payload["messages"].insert(
|
||
0,
|
||
{"role": "system", "content": system_prompt},
|
||
)
|
||
|
||
data = json.dumps(payload, ensure_ascii=False).encode("utf-8")
|
||
req = Request(
|
||
url=self.minimax_base_url,
|
||
data=data,
|
||
headers={
|
||
"Authorization": f"Bearer {self.minimax_api_key}",
|
||
"Content-Type": "application/json",
|
||
},
|
||
method="POST",
|
||
)
|
||
|
||
try:
|
||
with urlopen(req, timeout=self.minimax_timeout_s) as resp:
|
||
raw = resp.read().decode("utf-8", errors="ignore")
|
||
obj = json.loads(raw)
|
||
return (
|
||
obj.get("choices", [{}])[0]
|
||
.get("message", {})
|
||
.get("content", "")
|
||
.strip()
|
||
or "(模型未返回内容)"
|
||
)
|
||
except HTTPError as e:
|
||
body = e.read().decode("utf-8", errors="ignore")
|
||
print(f"[Minimax Vision HTTP {e.code}] {body}")
|
||
return f"(Minimax Vision调用失败 HTTP {e.code}:{e.reason})"
|
||
except (URLError, TimeoutError) as e:
|
||
return f"(Minimax Vision调用失败:{e})"
|
||
except Exception as e:
|
||
return f"(Minimax Vision解析失败:{e})"
|
||
|
||
def _ai_chat(
|
||
self,
|
||
model: str,
|
||
system_prompt: str,
|
||
user_prompt: str,
|
||
image_path: Optional[Path] = None,
|
||
) -> str:
|
||
"""
|
||
统一 AI 调用入口。根据 self.ai_provider 路由到不同后端实现。
|
||
model 参数在 ollama 模式下直接使用;在 minimax 模式下忽略(使用类级别配置的模型)。
|
||
"""
|
||
if self.ai_provider == "minimax":
|
||
if image_path is not None:
|
||
return self._call_minimax_vision(system_prompt, user_prompt, image_path)
|
||
else:
|
||
return self._call_minimax_text(system_prompt, user_prompt)
|
||
else:
|
||
return self._ollama_chat(model, system_prompt, user_prompt, image_path)
|
||
|
||
def _get_prompt_for_image(self, image_type: str, param: str, figure_num: int) -> Dict[str, str]:
|
||
"""按图片类型返回 system/user 提示词,带防幻觉约束。"""
|
||
system = (
|
||
"你是一位水质遥感与机器学习建模专家。\n"
|
||
"研究背景:我们利用高光谱影像数据,结合机器学习算法对研究区的水质参数进行了空间反演,并生成了以下图表。"
|
||
"现需要撰写自动化分析报告,请严格按照“图表类型→分析重点”的对应关系进行描述。\n\n"
|
||
"分析要求:\n"
|
||
"1. 请严格基于图片中可见信息进行分析,禁止编造不存在的数值、区域名称、采样时间或结论。\n"
|
||
"2. 如果图片无法支撑某项判断,必须明确写“根据本图无法判断”。\n"
|
||
"3. 不允许引用图片之外的背景知识来补全细节。"
|
||
)
|
||
|
||
# 为每种图表类型单独定义:分析要点 + 结论聚焦
|
||
type_specs = {
|
||
"histogram": {
|
||
"analysis": (
|
||
"分析要点:\n"
|
||
"- 分布形态:是左偏、右偏还是对称?是否存在多峰?\n"
|
||
"- 集中范围:数据主要集中在哪个区间?(参照横轴和纵轴柱高)\n"
|
||
"- 离群值:是否有明显孤立于主体分布的小柱,位于何处?\n"
|
||
"- 若图中包含拟合曲线,描述其形状(正态、指数等)。"
|
||
),
|
||
"conclusion": (
|
||
"结论应聚焦于:该参数的分布形态(如左偏/右偏/对称)、主要集中区间、是否存在极端离群值。"
|
||
"用一句话概括数据分布的核心特征,不推测成因。"
|
||
),
|
||
},
|
||
"spectrum_comparison": {
|
||
"analysis": (
|
||
"分析要点:\n"
|
||
"- 多条曲线的整体趋势是否一致?\n"
|
||
"- 在哪些波段(参照横轴波长位置)出现明显分离?\n"
|
||
"- 是否存在系统性的整体偏移(一条曲线全程高于另一条)?\n"
|
||
"- 图中是否有阴影或误差带表示置信区间?若有,描述其范围。"
|
||
),
|
||
"conclusion": (
|
||
"结论应聚焦于:各光谱曲线的整体一致性、关键差异波段、是否存在系统性偏移。"
|
||
"用一句话概括光谱对比的主要特征,不推测物理原因。"
|
||
),
|
||
},
|
||
"scatter_with_confidence": {
|
||
"analysis": (
|
||
"分析要点:\n"
|
||
"- 点云整体是否沿1:1线(对角线)分布?\n"
|
||
"- 点云在低值区/高值区是否存在系统性偏离(如整体偏上/偏下)?\n"
|
||
"- 置信带(若存在)覆盖了多少点?是否所有点都在置信带内?\n"
|
||
"- 是否有明显离群点(远离主体点云)?"
|
||
),
|
||
"conclusion": (
|
||
"结论应聚焦于:模型预测精度(点云与1:1线贴合程度)、偏差方向、置信带覆盖情况。"
|
||
"用一句话评价模型性能,不推测误差来源。"
|
||
),
|
||
},
|
||
"boxplot": {
|
||
"analysis": (
|
||
"分析要点:\n"
|
||
"- 中位数(箱体中间线)的位置。\n"
|
||
"- 四分位间距(箱体高度)反映的离散程度。\n"
|
||
"- 须(whisker)的长度,是否超出1.5倍IQR的离群点(用圆点/星号标示)。\n"
|
||
"- 若多个箱线图并排,比较各组的中心趋势和离散度。"
|
||
),
|
||
"conclusion": (
|
||
"结论应聚焦于:各组的中心趋势(中位数)、离散程度(四分位距)、是否存在离群点。"
|
||
"用一句话概括数据分布的统计特征,若有多组则简述对比。"
|
||
),
|
||
},
|
||
"distribution": {
|
||
"analysis": (
|
||
"分析要点:\n"
|
||
"- 高值区域:位于图中的哪个方位(如东北部、中部偏西、东南沿岸等)?呈现何种形状(斑块状、条带状、片状)?\n"
|
||
"- 低值区域:位置及形态。\n"
|
||
"- 梯度变化:是否存在明显的从某方位向另一方位递减或递增的趋势?\n"
|
||
"- 聚集特征:高值区是否成片聚集,还是零星散布?\n"
|
||
"注意:仅使用方位描述位置(如上、下、左、右、中心、边缘、沿岸等),禁止使用具体经纬度坐标或地名。"
|
||
),
|
||
"conclusion": (
|
||
"结论应聚焦于:高值区与低值区的空间方位、聚集形态、主要梯度方向。"
|
||
"用一句话概括空间分布格局,不推测污染源或成因。"
|
||
),
|
||
},
|
||
"correlation_heatmap": {
|
||
"analysis": (
|
||
"分析要点:\n"
|
||
"- 各变量对之间的相关性强度:颜色深浅对应的相关系数大小(参照图例)。\n"
|
||
"- 正相关与负相关:红色/蓝色分别代表正负(根据图例),描述主要的高正相关对和高负相关对。\n"
|
||
"- 若图中包含数值标注,可提及范围(如“大多数相关系数介于0.6~0.8”),但不得编造具体数字。\n"
|
||
"- 若单元格颜色过于接近难以区分,则写“根据本图无法判断具体相关性强弱”。"
|
||
),
|
||
"conclusion": (
|
||
"结论应聚焦于:变量间相关性的整体强弱水平、最主要的正负相关对。"
|
||
"用一句话概括相关性矩阵的核心特征,不推测因果关系。"
|
||
),
|
||
},
|
||
}
|
||
|
||
# 默认规格(如果类型未定义)
|
||
default_spec = {
|
||
"analysis": "重点:概括图中主要信息,列出可见的轴标签、图例、数据特征。",
|
||
"conclusion": "结论应基于可见信息,概括图中主要趋势或数据特征,不添加外部知识。",
|
||
}
|
||
|
||
spec = type_specs.get(image_type, default_spec)
|
||
analysis_part = spec["analysis"]
|
||
conclusion_part = spec["conclusion"]
|
||
|
||
common_output = (
|
||
"输出格式:\n"
|
||
"请结合坐标轴、图例、曲线、点云、颜色条等可见元素,描述数据特征(如分布形态、对比关系、空间位置等),引用图中具体元素但不编造数值。"
|
||
"随后用一句话总结该图揭示的主要趋势或数据质量。总结必须严格基于前文描述的可见信息,不得引入图中未呈现的外部知识、推测原因或隐含假设。"
|
||
"若信息不足以得出明确结论,则写“根据本图无法得出明确结论”。"
|
||
"要求:直接输出分析内容,不要使用“第一段”“第二段”等标记,两段之间不要留空行。")
|
||
|
||
user = (
|
||
f"图号:图{figure_num}\n"
|
||
f"参数:{param}\n"
|
||
f"图类型:{image_type}\n\n"
|
||
f"{analysis_part}\n\n"
|
||
f"{common_output}"
|
||
)
|
||
return {"system": system, "user": user}
|
||
|
||
|
||
|
||
def _style_figure_caption_simsun_xiaosi(self, paragraph):
|
||
"""图题格式:宋体、小四(12pt),中英文均设 eastAsia 为宋体。"""
|
||
for run in paragraph.runs:
|
||
run.font.name = self.chinese_font
|
||
run.font.size = Pt(12)
|
||
rPr = run._element.get_or_add_rPr()
|
||
rPr.rFonts.set(qn('w:eastAsia'), self.chinese_font)
|
||
|
||
def _add_image_with_caption(self, doc: Document, image_path: str, caption: str, width=Inches(5.5)):
|
||
"""
|
||
统一插入图像并添加图题,确保图像和图题在同一页
|
||
|
||
Args:
|
||
doc: Word文档对象
|
||
image_path: 图像文件路径
|
||
caption: 图题文字(如 "图3-1 航线规划")
|
||
width: 图像宽度
|
||
"""
|
||
try:
|
||
# 创建图像段落
|
||
img_paragraph = doc.add_paragraph()
|
||
img_paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||
# 设置段落不分页,与下一段(图题)保持在一起
|
||
img_paragraph.paragraph_format.keep_with_next = True
|
||
img_paragraph.paragraph_format.keep_together = True
|
||
img_paragraph.paragraph_format.space_after = Pt(6) # 图像后小间距
|
||
|
||
# 插入图像
|
||
run = img_paragraph.add_run()
|
||
run.add_picture(str(image_path), width=width)
|
||
|
||
# 创建图题段落(宋体小四)
|
||
caption_para = doc.add_paragraph(caption, style='Caption')
|
||
caption_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||
self._style_figure_caption_simsun_xiaosi(caption_para)
|
||
# 设置图题段落与上一段(图像)保持在一起
|
||
caption_para.paragraph_format.keep_with_next = False
|
||
caption_para.paragraph_format.keep_together = True
|
||
caption_para.paragraph_format.space_before = Pt(0)
|
||
caption_para.paragraph_format.space_after = Pt(12)
|
||
|
||
return True
|
||
except Exception as e:
|
||
doc.add_paragraph(f"[无法插入图像: {e}]")
|
||
return False
|
||
|
||
def _add_ai_analysis_paragraph(self, doc: Document, analysis_text: str):
|
||
"""在 Word 中插入 AI 分析段落(图片后)。"""
|
||
# 清理文本:去除段落标记和多余空行
|
||
cleaned_text = analysis_text.strip()
|
||
# 去除"第一段:"和"第二段:"标记
|
||
cleaned_text = cleaned_text.replace("第一段:", "").replace("第二段:", "")
|
||
# 去除连续多个换行,替换为单个空格
|
||
import re
|
||
cleaned_text = re.sub(r'\n+', ' ', cleaned_text)
|
||
# 去除连续多个空格,替换为单个空格
|
||
cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()
|
||
|
||
p = doc.add_paragraph()
|
||
p.paragraph_format.first_line_indent = Pt(24)
|
||
p.paragraph_format.line_spacing = 1.5
|
||
p.paragraph_format.space_after = Pt(12) # 新增:段后间距与正文一致
|
||
run1 = p.add_run()
|
||
run1.font.name = self.chinese_font
|
||
run1.font.bold = True
|
||
run1.font.size = Pt(12) # 修改:从 Pt(11) 改为 Pt(12)
|
||
run1._element.rPr.rFonts.set(qn('w:eastAsia'), self.chinese_font)
|
||
run2 = p.add_run(analysis_text.strip())
|
||
run2.font.name = self.chinese_font
|
||
run2.font.size = Pt(12) # 修改:从 Pt(11) 改为 Pt(12)
|
||
run2._element.rPr.rFonts.set(qn('w:eastAsia'), self.chinese_font)
|
||
|
||
def _analyze_and_cache_image(self, image_path: Path, image_type: str, param: str, figure_num: int) -> str:
|
||
"""分析单张图片并缓存,失败则返回可展示的提示文本。"""
|
||
if not self.enable_ai_analysis:
|
||
return "(AI分析已关闭)"
|
||
if not image_path.exists():
|
||
return "(图片不存在,无法分析)"
|
||
|
||
cache = self._load_ai_cache()
|
||
cache_key = f"{image_path.name}::{image_path.stat().st_mtime_ns}::{self.ollama_vision_model}::{image_type}"
|
||
if cache_key in cache:
|
||
return str(cache[cache_key])
|
||
|
||
prompts = self._get_prompt_for_image(image_type=image_type, param=param, figure_num=figure_num)
|
||
text = self._ai_chat(
|
||
model=self.ollama_vision_model,
|
||
system_prompt=prompts["system"],
|
||
user_prompt=prompts["user"],
|
||
image_path=image_path,
|
||
)
|
||
cache[cache_key] = text
|
||
self._save_ai_cache(cache)
|
||
return text
|
||
|
||
def _create_progress(self, total: int, desc: str = "进度"):
|
||
"""创建进度条:优先 tqdm,否则使用简单进度条。"""
|
||
try:
|
||
from tqdm import tqdm # type: ignore
|
||
return tqdm(total=total, desc=desc, unit="步", ncols=90)
|
||
except Exception:
|
||
return _SimpleProgress(total=total, desc=desc)
|
||
|
||
def _analyze_statistics(self, stats_data: List[Dict[str, Any]], param_names: List[str]) -> str:
|
||
"""对水质参数统计数据进行 AI 分析"""
|
||
if not self.enable_ai_analysis:
|
||
return "(AI分析已关闭)"
|
||
|
||
# 构造统计数据文本
|
||
stats_text = "水质参数统计摘要:\n"
|
||
for stat in stats_data:
|
||
stats_text += f"- {stat['参数']}: 点位数={stat['点位数']}, 范围=[{stat['最小值']}, {stat['最大值']}], 均值={stat['平均值']}, 标准差={stat['标准差']}\n"
|
||
|
||
system = """你是一位水质遥感与统计分析专家。
|
||
请基于提供的统计数据,给出专业分析:
|
||
1. 识别哪些参数变异程度较高(标准差大)
|
||
2. 识别哪些参数数值范围异常
|
||
3. 评估数据质量和分布特征
|
||
4. 禁止编造数据外的信息"""
|
||
|
||
user = f"""以下是水质参数的统计数据,请给出100-200字的专业分析:
|
||
{stats_text}
|
||
|
||
输出格式:数据特征分析(变异程度、数值范围等)结论与数据质量评估"""
|
||
|
||
return self._ai_chat(self.ollama_text_model, system, user, image_path=None)
|
||
|
||
|
||
def generate_report(self,
|
||
work_dir: str = None,
|
||
parameters: List[str] = None,
|
||
report_title: str = "水质参数反演分析报告",
|
||
output_path: Optional[str] = None) -> str:
|
||
"""
|
||
生成 Word 报告 - 所有数据均来自工作目录(work_dir)
|
||
可视化图片、统计数据等均从 work_dir/14_visualization 和 work_dir/4_processed_data 中读取
|
||
"""
|
||
# 设置工作目录(整个流程的核心)
|
||
if work_dir is not None:
|
||
self.work_dir = Path(work_dir)
|
||
self.visualization_dir = self.work_dir / "14_visualization"
|
||
if getattr(self, "_output_dir_is_default", False):
|
||
self.output_dir = self.visualization_dir
|
||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||
self.ai_cache_path = self.output_dir / "ollama_image_analyses_cache.json"
|
||
|
||
if parameters is None:
|
||
parameters = ["Chlorophyll", "COD", "DO", "PH", "Temperature",
|
||
"spCond", "Turbidity", "TDS", "Cl-", "NO3-N",
|
||
"NH3-N", "BGA", "TT"]
|
||
|
||
vis_dir = self.visualization_dir
|
||
|
||
if not vis_dir.exists():
|
||
raise FileNotFoundError(f"可视化目录不存在: {vis_dir}")
|
||
|
||
if output_path is None:
|
||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||
output_path = self.output_dir / f"水质参数反演分析报告_{timestamp}.docx"
|
||
else:
|
||
output_path = Path(output_path)
|
||
|
||
# 进度条(按“图片处理 + 汇总”计步)
|
||
total_images = sum(len(self.parameter_images.get(p, [])) for p in parameters)
|
||
total_steps = total_images + 1 + 1 # +1 相关性热力图(尝试一次),+1 综合总结
|
||
progress = self._create_progress(total=total_steps, desc="生成Word报告")
|
||
|
||
# 创建文档
|
||
doc = Document()
|
||
|
||
# 设置页面
|
||
section = doc.sections[0]
|
||
section.page_width = Cm(21)
|
||
section.page_height = Cm(29.7)
|
||
section.left_margin = Cm(2.5)
|
||
section.right_margin = Cm(2.5)
|
||
section.top_margin = Cm(2.5)
|
||
section.bottom_margin = Cm(2.5)
|
||
|
||
try:
|
||
# 添加封面页
|
||
self._add_cover_page(doc)
|
||
self._add_company_description_page(doc)
|
||
self._add_data_acquisition_section(doc)
|
||
self._add_data_processing_section(doc)
|
||
|
||
# 全文图片分析结果收集(用于末尾汇总)
|
||
all_image_analyses: List[Dict[str, Any]] = []
|
||
|
||
# 结果分析(含热力图):返回更新后的图号计数
|
||
figure_counter = 1
|
||
figure_counter = self._add_result_analysis_section(
|
||
doc, vis_dir, figure_counter, all_image_analyses, progress=progress
|
||
)
|
||
|
||
# 物理模型反演浓度统计与分析(第4.1节)
|
||
figure_counter = self._add_physical_inversion_section(
|
||
doc, self.work_dir, figure_counter, all_image_analyses, progress=progress
|
||
)
|
||
|
||
# 设置页眉和页码(从正文开始)
|
||
self._setup_header_and_footer(section)
|
||
|
||
# 按参数生成内容(带编号):参数章节从 5 开始编号
|
||
base_section_num = 5
|
||
last_param_section_num = base_section_num + len(parameters) - 1
|
||
for section_num, param in enumerate(parameters, base_section_num):
|
||
figure_counter = self._add_parameter_section(
|
||
doc,
|
||
param,
|
||
vis_dir,
|
||
section_num,
|
||
figure_counter,
|
||
all_image_analyses,
|
||
progress=progress,
|
||
)
|
||
if section_num != last_param_section_num:
|
||
doc.add_page_break()
|
||
|
||
# 汇总总结(放在所有图片/参数之后)
|
||
doc.add_page_break()
|
||
summary_section_num = base_section_num + len(parameters)
|
||
summary_heading = doc.add_heading(f"{summary_section_num} 综合分析总结", level=1)
|
||
self._style_heading(summary_heading, level=1)
|
||
|
||
if self.enable_ai_analysis and all_image_analyses:
|
||
analyses_text = "\n\n".join(
|
||
[
|
||
f"图{a.get('figure_num')}({a.get('param')} / {a.get('image_type')} / {a.get('image_name')})\n{a.get('analysis')}"
|
||
for a in all_image_analyses
|
||
]
|
||
)
|
||
system = (
|
||
"你是一位水质遥感与报告撰写专家。"
|
||
"只能基于提供的“逐图分析文本”做总结,禁止引入任何外部事实或猜测。"
|
||
"若信息不足,必须明确说明“根据现有分析无法判断”。"
|
||
)
|
||
user = (
|
||
"以下是逐图分析文本,请给出报告级别的综合总结,要求:\n"
|
||
"- 150~300字中文\n"
|
||
"- 结构:总体概况 / 主要异常或热点 / 参数间关系(如有)/ 建议关注点\n"
|
||
"- 不要编造具体数值、地名、日期\n\n"
|
||
f"{analyses_text}"
|
||
)
|
||
summary_text = self._ai_chat(self.ollama_text_model, system, user, image_path=None)
|
||
para = doc.add_paragraph(summary_text)
|
||
para.paragraph_format.first_line_indent = Pt(24)
|
||
para.paragraph_format.line_spacing = 1.5
|
||
for run in para.runs:
|
||
run.font.name = self.chinese_font
|
||
run.font.size = Pt(12)
|
||
run._element.rPr.rFonts.set(qn('w:eastAsia'), self.chinese_font)
|
||
else:
|
||
doc.add_paragraph("(未启用AI分析或无可用分析文本,无法生成综合总结。)")
|
||
|
||
# 综合总结完成,进度 +1
|
||
try:
|
||
progress.update(1)
|
||
except Exception:
|
||
pass
|
||
finally:
|
||
try:
|
||
progress.close()
|
||
except Exception:
|
||
pass
|
||
|
||
# 保存文档
|
||
doc.save(str(output_path))
|
||
print(f"✅ Word报告生成完成: {output_path}")
|
||
|
||
return str(output_path)
|
||
|
||
def _add_parameter_section(
|
||
self,
|
||
doc,
|
||
param: str,
|
||
vis_dir: Path,
|
||
param_index: int = 1,
|
||
start_figure_num: int = 1,
|
||
all_image_analyses: Optional[List[Dict[str, Any]]] = None,
|
||
progress=None,
|
||
):
|
||
"""为单个参数添加报告章节(带编号和规范中英文图题)"""
|
||
if param not in self.parameter_descriptions:
|
||
print(f"警告: 参数 {param} 没有预定义的描述")
|
||
return start_figure_num
|
||
|
||
# 添加带编号的参数标题
|
||
heading = doc.add_heading(f"{param_index}. {param} 参数分析", level=1)
|
||
self._style_heading(heading, level=1)
|
||
|
||
# 添加参数描述
|
||
desc_para = doc.add_paragraph(self.parameter_descriptions[param])
|
||
desc_para.paragraph_format.space_after = Pt(12)
|
||
|
||
# 设置首行缩进两个字符(中文排版规范)
|
||
desc_para.paragraph_format.first_line_indent = Pt(24)
|
||
|
||
# 设置正文样式:宋体小四,1.5倍行距
|
||
desc_para.paragraph_format.line_spacing = 1.5
|
||
for run in desc_para.runs:
|
||
run.font.name = 'SimSun'
|
||
run.font.size = Pt(12)
|
||
run._element.rPr.rFonts.set(qn('w:eastAsia'), 'SimSun')
|
||
|
||
# 添加图片 - 支持子文件夹结构 + 中英文图题
|
||
if param in self.parameter_images:
|
||
image_list = self.parameter_images[param]
|
||
title_map = {
|
||
"histogram": "直方图",
|
||
"spectrum_comparison": "光谱对比图",
|
||
"scatter_with_confidence": "模型散点图",
|
||
"boxplot": "箱型图",
|
||
"distribution": "分布图"
|
||
}
|
||
|
||
for i, img_name in enumerate(image_list):
|
||
figure_num = start_figure_num + i
|
||
|
||
# 选择子文件夹
|
||
if "boxplot" in img_name.lower():
|
||
sub_dir = vis_dir / "boxplots"
|
||
title_key = "boxplot"
|
||
elif "scatter" in img_name.lower() or "confidence" in img_name.lower():
|
||
sub_dir = vis_dir / "scatter_plots"
|
||
title_key = "scatter_with_confidence"
|
||
elif "histogram" in img_name.lower():
|
||
sub_dir = vis_dir
|
||
title_key = "histogram"
|
||
elif "spectrum" in img_name.lower():
|
||
sub_dir = vis_dir
|
||
title_key = "spectrum_comparison"
|
||
elif "distribution" in img_name.lower():
|
||
sub_dir = vis_dir
|
||
title_key = "distribution"
|
||
else:
|
||
sub_dir = vis_dir
|
||
title_key = "histogram"
|
||
|
||
img_path = sub_dir / img_name
|
||
if not img_path.exists():
|
||
img_path = vis_dir / img_name
|
||
|
||
if img_path.exists():
|
||
param_cn = param.replace("Chlorophyll", "叶绿素").replace("NO3-N", "硝酸盐氮").replace("NH3-N", "氨氮")
|
||
cn_title = title_map.get(title_key, "分析图")
|
||
|
||
# 使用统一的图像插入方法(中文图题)
|
||
caption_text = f"图{figure_num} {param_cn}{cn_title}"
|
||
self._add_image_with_caption(doc, str(img_path), caption_text, width=Inches(6.0))
|
||
|
||
# 添加英文图题:宋体小四(与中文图题一致)
|
||
en_title = title_key.replace('_', ' ').title()
|
||
caption_en = doc.add_paragraph(f"Figure {figure_num} {param} {en_title}")
|
||
caption_en.style = 'Caption'
|
||
caption_en.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||
caption_en.paragraph_format.space_after = Pt(8)
|
||
self._style_figure_caption_simsun_xiaosi(caption_en)
|
||
for run in caption_en.runs:
|
||
run.font.color.rgb = RGBColor(0, 0, 0)
|
||
|
||
# AI 分析:插入在图题之后
|
||
analysis_text = self._analyze_and_cache_image(
|
||
image_path=img_path,
|
||
image_type=title_key,
|
||
param=param,
|
||
figure_num=figure_num,
|
||
)
|
||
self._add_ai_analysis_paragraph(doc, analysis_text)
|
||
if all_image_analyses is not None:
|
||
all_image_analyses.append(
|
||
{
|
||
"figure_num": figure_num,
|
||
"param": param,
|
||
"image_type": title_key,
|
||
"image_name": img_name,
|
||
"analysis": analysis_text,
|
||
}
|
||
)
|
||
else:
|
||
error_para = doc.add_paragraph(f"[图片未找到: {img_name} (已搜索根目录和对应子文件夹)]")
|
||
error_para.paragraph_format.first_line_indent = Pt(0)
|
||
for run in error_para.runs:
|
||
run.font.name = self.chinese_font
|
||
if hasattr(run._element, 'rPr'):
|
||
run._element.rPr.rFonts.set(qn('w:eastAsia'), self.chinese_font)
|
||
|
||
# 每处理完一张图(无论成功/失败)更新进度条
|
||
try:
|
||
if progress is not None:
|
||
progress.update(1)
|
||
except Exception:
|
||
pass
|
||
|
||
doc.add_paragraph() # 章节结束空行
|
||
return start_figure_num + len(image_list)
|
||
|
||
def _add_cover_page(self, doc):
|
||
"""添加专业的封面页 - 优化后的布局"""
|
||
section = doc.sections[-1]
|
||
section.different_first_page_header_footer = True
|
||
|
||
# 1. 左上角图片(增大) - 使用相对路径
|
||
cover_top_img_path = get_resource_path("data/icons/word/lica.png")
|
||
if os.path.isfile(cover_top_img_path):
|
||
try:
|
||
p = doc.add_paragraph()
|
||
p.alignment = WD_ALIGN_PARAGRAPH.LEFT
|
||
p.add_run().add_picture(str(cover_top_img_path), width=Inches(3.2))
|
||
except Exception as e:
|
||
print(f"封面顶部图片加载失败: {e}")
|
||
pass
|
||
|
||
# 增加一些顶部空间
|
||
for _ in range(6):
|
||
doc.add_paragraph()
|
||
|
||
# 2. 主标题 - 增大字体
|
||
title = doc.add_heading("无人机高光谱水质参数分析报告", level=0)
|
||
title.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||
for run in title.runs:
|
||
run.font.name = self.title_font
|
||
run.font.size = Pt(36) # 增大标题字体
|
||
run._element.rPr.rFonts.set(qn('w:eastAsia'), self.title_font)
|
||
|
||
# 3. 公司名称和日期 - 紧挨着放在底部图片上方
|
||
doc.add_paragraph() # 小间隔
|
||
|
||
for _ in range(6):
|
||
doc.add_paragraph()
|
||
|
||
company = doc.add_paragraph("北京理加联合科技有限公司")
|
||
company.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||
for run in company.runs:
|
||
run.font.name = self.chinese_font
|
||
run.font.size = Pt(18)
|
||
run.font.bold = True # 加粗
|
||
run._element.rPr.rFonts.set(qn('w:eastAsia'), self.chinese_font)
|
||
|
||
# 日期紧挨着公司名称下方
|
||
date_str = datetime.now().strftime("%Y年%m月%d日")
|
||
date_para = doc.add_paragraph(date_str)
|
||
date_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||
for run in date_para.runs:
|
||
run.font.name = self.chinese_font
|
||
run.font.size = Pt(14)
|
||
run.font.bold = True # 加粗
|
||
run._element.rPr.rFonts.set(qn('w:eastAsia'), self.chinese_font)
|
||
|
||
|
||
|
||
# 4. 底部图片(增大) - 使用相对路径
|
||
cover_bottom_img_path = get_resource_path("data/icons/word/fenmian.png")
|
||
if os.path.isfile(cover_bottom_img_path):
|
||
try:
|
||
p = doc.add_paragraph()
|
||
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||
p.add_run().add_picture(str(cover_bottom_img_path), width=Inches(5.8))
|
||
except Exception as e:
|
||
print(f"封面底部图片加载失败: {e}")
|
||
pass
|
||
|
||
def _add_company_description_page(self, doc):
|
||
"""添加公司描述页,每个自然段均首行缩进2字符(24磅)"""
|
||
h = doc.add_heading("1 公司简介", level=1)
|
||
self._style_heading(h, level=1)
|
||
|
||
# 公司描述原始文本(使用三引号保留换行)
|
||
company_text = """北京理加联合科技有限公司成立于2005年,总部位于北京光华创业园,在深圳、西安设有办事处。公司专注于生态环境仪器的自主研发与技术服务,致力于为国内用户提供全球领先的稳定性同位素、痕量气体、高光谱成像、环境空气质量及大气颗粒物监测等测量设备。
|
||
作为英国ASD、美国Resonon、美国Campbell、法国AMS等多家国际知名品牌的中国区代理商与技术服务中心,理加联合同时拥有一支经验丰富的研发团队,已获得20余项实用新型专利。自主研发产品包括LI-2100全自动真空冷凝抽提系统、SF-3500系列土壤气体通量自动测量系统、PS-9000便携式土壤碳通量自动测量系统等,广泛应用于生态、环境、农业等领域。
|
||
公司设有ASD和Resonon产品的定标实验室,显著提升定标效率、降低用户成本。2018年通过ISO9001质量管理体系认证,售后服务团队定期赴原厂培训。理加联合已参与“211”工程、“985”工程及中国生态系统研究网络(CERN)等重大科研项目,以专业技术与完善售后赢得广泛市场认可。"""
|
||
|
||
# 按换行符分割成独立段落,并过滤可能的空行
|
||
company_paragraphs = [p.strip() for p in company_text.split('\n') if p.strip()]
|
||
for para_text in company_paragraphs:
|
||
para = doc.add_paragraph(para_text)
|
||
para.paragraph_format.first_line_indent = Pt(24) # 首行缩进2字符(约24磅)
|
||
|
||
# 设置正文样式:宋体小四,1.5倍行距
|
||
para.paragraph_format.line_spacing = 1.5
|
||
for run in para.runs:
|
||
run.font.name = 'SimSun'
|
||
run.font.size = Pt(12)
|
||
run._element.rPr.rFonts.set(qn('w:eastAsia'), 'SimSun')
|
||
|
||
for _ in range(5):
|
||
doc.add_paragraph()
|
||
|
||
# 联系方式信息(同样按行分割,每行独立且首行缩进)
|
||
contact_info = """地址:北京市海淀区安宁庄东路18号光华创业园5号楼(生产研发)光华创业园科研楼四层
|
||
电话:13910499761 13910124070 010-51292601
|
||
传真:010-82899770-8014
|
||
邮箱:info@li-ca.com
|
||
邮编:100085"""
|
||
|
||
contact_lines = [line.strip() for line in contact_info.split('\n') if line.strip()]
|
||
for line in contact_lines:
|
||
contact_para = doc.add_paragraph(line)
|
||
contact_para.paragraph_format.line_spacing = 1.5
|
||
for run in contact_para.runs:
|
||
run.font.name = 'SimSun'
|
||
run.font.size = Pt(12)
|
||
run._element.rPr.rFonts.set(qn('w:eastAsia'), 'SimSun')
|
||
|
||
doc.add_page_break()
|
||
|
||
def _add_data_acquisition_section(self, doc):
|
||
"""添加数据获取章节"""
|
||
h = doc.add_heading("2 数据获取", level=1)
|
||
self._style_heading(h, level=1)
|
||
|
||
# 第一张图片标题
|
||
|
||
|
||
# 第一张图片 - 使用相对路径
|
||
img1_path = get_resource_path("data/icons/word/屏幕截图 2026-03-31 144131.png")
|
||
if os.path.isfile(img1_path):
|
||
p = doc.add_paragraph()
|
||
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||
p.add_run().add_picture(str(img1_path), width=Inches(6.0))
|
||
|
||
title1 = doc.add_paragraph("大疆M400无人机及300TC高光谱相机")
|
||
title1.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||
for run in title1.runs:
|
||
run.font.name = self.title_font
|
||
run.font.size = Pt(14)
|
||
run.font.bold = True
|
||
run._element.rPr.rFonts.set(qn('w:eastAsia'), self.title_font)
|
||
doc.add_paragraph() # 图片和文字间空行
|
||
|
||
|
||
|
||
# 数据获取描述文字
|
||
data_text = """本次研究采用大疆M400无人机搭载高光谱成像仪进行数据获取。飞行区域覆盖研究区全部水域及周边参照地,共执行飞行任务____架次,总飞行时间约为____小时,实际有效覆盖面积约____平方公里。飞行前进行航线规划,设置航向重叠率____%、旁向重叠率____%,飞行高度为____米,地面分辨率达到____米。为确保数据质量,选择天气晴朗、风速小于____级、太阳高度角适宜的气象窗口期进行作业,并在水体周边布设____个地面控制点及____个光谱定标参考板。整个数据获取过程严格按照无人机操作规范执行,获取的高光谱原始数据存储于机载固态硬盘,后续用于几何校正、辐射定标等预处理步骤。"""
|
||
|
||
para = doc.add_paragraph(data_text)
|
||
para.paragraph_format.first_line_indent = Pt(24)
|
||
para.paragraph_format.space_after = Pt(12)
|
||
para.paragraph_format.line_spacing = 1.5
|
||
|
||
# 设置正文字体:宋体小四
|
||
for run in para.runs:
|
||
run.font.name = 'SimSun'
|
||
run.font.size = Pt(12)
|
||
run._element.rPr.rFonts.set(qn('w:eastAsia'), 'SimSun')
|
||
|
||
doc.add_page_break()
|
||
|
||
def _add_data_processing_section(self, doc):
|
||
"""添加数据处理章节"""
|
||
h = doc.add_heading("3 数据处理流程", level=1)
|
||
self._style_heading(h, level=1)
|
||
|
||
# 插入图片 - 使用相对路径
|
||
processing_img_path = get_resource_path("data/icons/word/liucheng.png")
|
||
if os.path.isfile(processing_img_path):
|
||
p = doc.add_paragraph()
|
||
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||
p.add_run().add_picture(str(processing_img_path), width=Inches(6.5))
|
||
|
||
# ===== 添加图片下方标题(图注)=====
|
||
caption_p = doc.add_paragraph()
|
||
caption_p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||
caption_run = caption_p.add_run("图3-1 水质高光谱反演数据处理流程图")
|
||
caption_run.font.name = 'SimSun'
|
||
caption_run.font.size = Pt(11)
|
||
caption_run._element.rPr.rFonts.set(qn('w:eastAsia'), 'SimSun')
|
||
caption_run.font.bold = False
|
||
# 设置图注段落格式
|
||
caption_p.paragraph_format.space_before = Pt(6)
|
||
caption_p.paragraph_format.space_after = Pt(12)
|
||
else:
|
||
doc.add_paragraph("[数据处理流程图片占位]")
|
||
|
||
doc.add_paragraph() # 图片和文字间空行(可选,因为图注下方已有间距)
|
||
|
||
# 数据处理描述文字(暂时留空,供后续填写)
|
||
processing_text = """采用基于高光谱遥感的水质反演流程来获取水体参数的空间分布。首先通过无人机或卫星平台获取研究区的高光谱影像,随后进行一系列预处理:几何校正使影像与真实地理坐标匹配,辐射校正将原始数值转换为表观辐亮度,大气校正则去除大气分子与气溶胶的影响以获取真实地表反射率;对于多航带数据还需进行航带自动拼接。针对水面特有的镜面反射,我们执行耀斑识别及去除,并利用BRDF校正消除观测角度变化带来的二向性反射差异。
|
||
之后采用归一化水体指数或深度学习方法自动分割出纯水域像元,排除陆地与植被干扰。在光谱分析阶段,从预处理后的高光谱数据中提取对叶绿素a、悬浮物、透明度等水质参数敏感的波段、比值或吸收深度等光谱特征,并基于地面同步实测数据构建机器学习模型(如随机森林、支持向量机或偏最小二乘回归)。最终将训练好的模型应用于整景影像,逐像元反演出水质参数浓度,并生成专题图与统计报告,实现从原始高光谱数据到水质空间分布信息的完整技术链。"""
|
||
|
||
para = doc.add_paragraph(processing_text)
|
||
para.paragraph_format.first_line_indent = Pt(24)
|
||
para.paragraph_format.space_after = Pt(12)
|
||
para.paragraph_format.line_spacing = 1.5
|
||
|
||
# 设置正文字体:宋体小四
|
||
for run in para.runs:
|
||
run.font.name = 'SimSun'
|
||
run.font.size = Pt(12)
|
||
run._element.rPr.rFonts.set(qn('w:eastAsia'), 'SimSun')
|
||
|
||
# 添加高光谱图像、耀斑区域和去耀斑图像展示
|
||
self._add_hyperspectral_images_section(doc)
|
||
|
||
doc.add_page_break()
|
||
|
||
def _add_hyperspectral_images_section(self, doc):
|
||
"""添加高光谱图像、耀斑区域和去耀斑图像展示"""
|
||
h = doc.add_heading("3.1 高光谱图像处理过程", level=2)
|
||
self._style_heading(h, level=2)
|
||
|
||
work_dir_path = self.work_dir
|
||
vis_dir = self.visualization_dir
|
||
|
||
# 0. 航线规划图
|
||
flight_path_img_path = work_dir_path / "14_visualization" / "flight_maps"
|
||
h3 = doc.add_heading("航线规划:", level=3)
|
||
self._style_heading(h3, level=3)
|
||
|
||
# 查找航线图文件
|
||
flight_map_files = []
|
||
if flight_path_img_path.exists():
|
||
flight_map_files = list(flight_path_img_path.glob("*.png")) + list(flight_path_img_path.glob("*.jpg"))
|
||
|
||
if flight_map_files:
|
||
# 使用最新的航线图文件
|
||
latest_flight_map = max(flight_map_files, key=lambda p: p.stat().st_mtime)
|
||
success = self._add_image_with_caption(doc, str(latest_flight_map), "图3-1 航线规划", width=Inches(5.5))
|
||
|
||
if success:
|
||
# AI 分析航线规划图
|
||
flight_analysis = self._analyze_flight_path_image(str(latest_flight_map))
|
||
self._add_ai_analysis_paragraph(doc, flight_analysis)
|
||
else:
|
||
doc.add_paragraph("[航线规划图 - 文件未找到]")
|
||
|
||
# 1. 高光谱原始图像
|
||
hyperspectral_img_path = work_dir_path / "1_water_mask" / "hsi_preview.png"
|
||
h3 = doc.add_heading("高光谱原始影像:", level=3)
|
||
self._style_heading(h3, level=3)
|
||
if hyperspectral_img_path.exists():
|
||
self._add_image_with_caption(doc, str(hyperspectral_img_path), "图3-2 高光谱原始影像", width=Inches(5.5))
|
||
else:
|
||
doc.add_paragraph("[高光谱原始影像 - 文件未找到]")
|
||
|
||
# 2. 水体掩膜叠加图
|
||
water_mask_overlay_path = work_dir_path / "1_water_mask" / "water_mask_overlay.png"
|
||
h3 = doc.add_heading("水体区域识别:", level=3)
|
||
self._style_heading(h3, level=3)
|
||
if water_mask_overlay_path.exists():
|
||
success = self._add_image_with_caption(doc, str(water_mask_overlay_path),
|
||
"图3-3 水体区域识别(蓝色半透明区域为水域)",
|
||
width=Inches(5.5))
|
||
if success:
|
||
water_analysis = self._analyze_water_mask_overlay(str(water_mask_overlay_path))
|
||
self._add_ai_analysis_paragraph(doc, water_analysis)
|
||
else:
|
||
doc.add_paragraph("[水体区域识别图 - 文件未找到]")
|
||
|
||
doc.add_paragraph()
|
||
|
||
# 2. 耀斑区域
|
||
glint_img_path = vis_dir / "glint_deglint_previews" / "glint_severe_glint_area_preview.png"
|
||
h3 = doc.add_heading("耀斑区域识别结果:", level=3)
|
||
self._style_heading(h3, level=3)
|
||
if glint_img_path.exists():
|
||
self._add_image_with_caption(doc, str(glint_img_path), "图3-4 耀斑区域识别结果", width=Inches(5.5))
|
||
else:
|
||
# 尝试查找其他可能的耀斑预览图
|
||
glint_files = list(vis_dir.glob("glint_deglint_previews/*glint*.png"))
|
||
if glint_files:
|
||
glint_img_path = glint_files[0]
|
||
self._add_image_with_caption(doc, str(glint_img_path), "图3-4 耀斑区域识别结果", width=Inches(5.5))
|
||
else:
|
||
doc.add_paragraph("[耀斑区域识别结果 - 文件未找到]")
|
||
|
||
doc.add_paragraph()
|
||
|
||
# 3. 去除耀斑后的图像
|
||
deglint_img_path = vis_dir / "glint_deglint_previews" / "deglint_deglint_goodman_preview.png"
|
||
h3 = doc.add_heading("去除耀斑后的影像:", level=3)
|
||
self._style_heading(h3, level=3)
|
||
if deglint_img_path.exists():
|
||
self._add_image_with_caption(doc, str(deglint_img_path), "图3-5 去除耀斑后的高光谱影像", width=Inches(5.5))
|
||
else:
|
||
# 尝试查找其他去耀斑预览图
|
||
deglint_files = list(vis_dir.glob("glint_deglint_previews/*deglint*.png"))
|
||
if deglint_files:
|
||
deglint_img_path = deglint_files[0]
|
||
self._add_image_with_caption(doc, str(deglint_img_path), "图3-5 去除耀斑后的影像", width=Inches(5.5))
|
||
else:
|
||
doc.add_paragraph("[去除耀斑后的影像 - 文件未找到]")
|
||
|
||
doc.add_paragraph()
|
||
|
||
# 4. AI分析耀斑位置分布
|
||
|
||
self._style_heading(h3, level=3)
|
||
glint_analysis = self._analyze_glint_distribution_with_ai(
|
||
str(glint_img_path) if 'glint_img_path' in locals() and Path(str(glint_img_path)).exists() else None,
|
||
str(hyperspectral_img_path) if hyperspectral_img_path.exists() else None
|
||
)
|
||
self._add_ai_analysis_paragraph(doc, glint_analysis)
|
||
|
||
# 5. 采样点分布图
|
||
sampling_map_dir = vis_dir / "sampling_maps"
|
||
h3 = doc.add_heading("采样点分布:", level=3)
|
||
self._style_heading(h3, level=3)
|
||
|
||
# 查找采样点分布图文件
|
||
sampling_map_files = []
|
||
if sampling_map_dir.exists():
|
||
sampling_map_files = list(sampling_map_dir.glob("*.png")) + list(sampling_map_dir.glob("*.jpg"))
|
||
|
||
if sampling_map_files:
|
||
# 使用最新的采样点分布图文件
|
||
latest_sampling_map = max(sampling_map_files, key=lambda p: p.stat().st_mtime)
|
||
success = self._add_image_with_caption(doc, str(latest_sampling_map), "图3-6 采样点分布图", width=Inches(5.5))
|
||
|
||
if success:
|
||
# AI 分析采样点分布图
|
||
sampling_analysis = self._analyze_sampling_distribution(str(latest_sampling_map))
|
||
self._add_ai_analysis_paragraph(doc, sampling_analysis)
|
||
else:
|
||
doc.add_paragraph("[采样点分布图 - 文件未找到]")
|
||
|
||
def _analyze_glint_distribution_with_ai(self, glint_img_path: str = None, original_img_path: str = None) -> str:
|
||
"""使用AI分析耀斑的位置分布"""
|
||
if not self.enable_ai_analysis:
|
||
return "AI分析已禁用。耀斑主要分布在水体表面强反射区域,通常出现在太阳光直射角度较大的位置。"
|
||
|
||
try:
|
||
analysis_prompt = """请分析这张高光谱影像中的耀斑分布情况。
|
||
请从以下几个方面进行专业分析:
|
||
1. 耀斑的主要分布位置(水体中心、边缘、特定方位等)
|
||
2. 耀斑面积占比估计
|
||
3. 耀斑分布特征(集中分布还是分散分布)
|
||
4. 可能的成因分析
|
||
5. 对水质参数反演的影响评估
|
||
|
||
请用专业且简洁的语言描述,控制在150字以内。"""
|
||
|
||
if glint_img_path and Path(glint_img_path).exists():
|
||
return self._ai_chat(self.ollama_vision_model, "你是一个专业的水质遥感分析专家。", analysis_prompt, Path(glint_img_path))
|
||
elif original_img_path and Path(original_img_path).exists():
|
||
return self._ai_chat(self.ollama_vision_model, "你是一个专业的水质遥感分析专家。", analysis_prompt, Path(original_img_path))
|
||
else:
|
||
return "基于影像分析,耀斑主要分布在水体表面强反射区域,对水质参数反演有一定影响,建议在数据处理时重点关注这些区域。"
|
||
|
||
except Exception as e:
|
||
return f"AI分析失败: {str(e)}。耀斑主要分布在水体表面强反射区域,通常与太阳入射角和水面粗糙度有关。"
|
||
|
||
def _analyze_flight_path_image(self, flight_img_path: str) -> str:
|
||
"""
|
||
使用AI分析航线规划图
|
||
|
||
分析内容:
|
||
1. 架次数量
|
||
2. 每个架次的飞行方向
|
||
3. 图例中的飞行起始结束时间
|
||
"""
|
||
if not self.enable_ai_analysis:
|
||
return "AI分析已禁用。根据航线规划图,可识别多个架次的飞行轨迹,每个架次具有不同的飞行方向和时间安排。"
|
||
|
||
try:
|
||
if not Path(flight_img_path).exists():
|
||
return "航线图文件不存在,无法进行分析。"
|
||
|
||
analysis_prompt = """请详细分析这张航线规划图,并严格按照以下要求输出:
|
||
|
||
分析要求:
|
||
1. 架次数量:明确指出图中有几个架次(几条不同颜色的轨迹线)
|
||
2. 飞行方向:描述每个架次的大致飞行方向(如:东西向、南北向、东北-西南向等)
|
||
3. 时间信息:从图例中提取每个架次的起始和结束时间
|
||
|
||
输出格式要求:
|
||
- 使用客观、准确的描述
|
||
- 避免推测性语言(如"可能"、"也许")
|
||
- 控制在200字以内
|
||
- 如果看不清具体时间,请明确说明"图例显示时间信息但具体数值不清晰"
|
||
|
||
示例输出格式:
|
||
"飞行共有X个架次:架次1(红色):东西向飞行,时间范围XX:XX-XX:XX架次2(蓝色):南北向飞行,时间范围XX:XX-XX:XX
|
||
...
|
||
各架次轨迹分布合理,覆盖了目标水体区域。"""
|
||
|
||
result = self._ai_chat(
|
||
self.ollama_vision_model,
|
||
"你是一位专业的航空摄影测量和遥感专家,擅长分析航线规划图。",
|
||
analysis_prompt,
|
||
Path(flight_img_path)
|
||
)
|
||
|
||
# 如果返回内容为空或太短,使用默认文本
|
||
if not result or len(result) < 20:
|
||
return "根据航线图分析,图中包含多个架次的飞行轨迹,各架次采用不同颜色标识,飞行方向各异,图例中标注了各架次的起始和结束时间。"
|
||
|
||
return result
|
||
|
||
except Exception as e:
|
||
return f"AI分析失败: {str(e)}。根据航线规划图,包含多个架次的飞行轨迹,各架次具有不同颜色和飞行方向,图例中标注了时间信息。"
|
||
|
||
def _analyze_water_mask_overlay(self, water_mask_path: str) -> str:
|
||
"""
|
||
使用AI分析水体区域识别图
|
||
|
||
分析内容:
|
||
1. 水体的分布情况(集中分布还是分散分布)
|
||
2. 水体的位置和形状特征
|
||
3. 从图像标注中提取的水域面积和占比
|
||
"""
|
||
if not self.enable_ai_analysis:
|
||
return "AI分析已禁用。根据水体区域识别图,蓝色半透明区域标识了水域范围,可观察到水体的分布情况和面积占比。"
|
||
|
||
try:
|
||
if not Path(water_mask_path).exists():
|
||
return "水体区域识别图文件不存在,无法进行分析。"
|
||
|
||
analysis_prompt = """【背景说明】
|
||
这是一座水库的遥感影像,水体区域以蓝色半透明标识。水库通常是人工筑坝蓄水形成,具有以下典型特征:
|
||
- 水体形态:较宽阔,形状相对规则,边界平滑
|
||
- 大坝位置:通常位于水库最窄处或下游方向
|
||
- 入库方向:上游河流汇入处,通常较窄或有分叉
|
||
- 出水方向:大坝方向,水体在此处收窄
|
||
|
||
【分析维度】
|
||
1. 水体整体形态:描述水库的形状(扇形、狭长形、不规则形、分叉形等),水体是集中还是分散?
|
||
2. 入库特征(重要):识别水体哪些位置有狭窄的入口或分叉——这些通常是河流入库的方向。描述入库位置(如东北角、西侧等)。
|
||
3. 大坝/出水方向推断(重要):根据水体形态,判断大坝最可能的位置。通常在水体最窄处、或水体延伸的末端。推断流向是“从XX方向流向大坝(XX方向)”。
|
||
4. 分支情况:是否有多个入库河流?是否有孤立水体?
|
||
5. 面积信息:从图像左上角标注中提取水域面积、影像总面积、水域占比。
|
||
|
||
【输出格式】
|
||
水体面积X.XX km² ,占比: X.X% ,形态: X。入库方向:XX方向(若有多个,依次列出)。出水/大坝方向:XX方向。流向推断:水体从XX方向汇入,流向大坝(XX方向)补充描述:[简要描述整体分布和形态特征]
|
||
|
||
【示例输出】
|
||
水体面积25.60 km² ,占比: 42.3% ,形态: 扇形分叉。入库方向:西北角和东北角各有狭窄水道汇入,为主要入库河流。出水/大坝方向:南侧水体最窄处。流向推断:水体从西北和东北两个方向汇入,向南侧大坝方向流动。补充描述:水库整体呈扇形,库区宽阔,有两个明显入库分支,符合山区水库典型特征"""
|
||
|
||
result = self._ai_chat(
|
||
self.ollama_vision_model,
|
||
"你是一位专业的水体遥感分析专家,擅长解读水体掩膜图和水域分布特征。",
|
||
analysis_prompt,
|
||
Path(water_mask_path)
|
||
)
|
||
|
||
# 如果返回内容为空或太短,使用默认文本
|
||
if not result or len(result) < 20:
|
||
return "根据水体区域识别图分析,蓝色半透明区域标识了水域范围。从图像标注可读取水域面积、影像总面积及水域占比信息,水体分布特征明显,便于后续水质参数反演分析。"
|
||
|
||
return result
|
||
|
||
except Exception as e:
|
||
return f"AI分析失败: {str(e)}。根据水体区域识别图,蓝色半透明区域标识了水域范围,图像左上角标注了水域面积、影像总面积及水域占比数据。"
|
||
|
||
def _analyze_sampling_distribution(self, sampling_map_path: str) -> str:
|
||
"""
|
||
使用AI分析采样点分布图
|
||
|
||
分析内容:
|
||
1. 采样点数量
|
||
2. 采样点在水体中的分布情况(均匀/集中、覆盖范围)
|
||
3. 采样点的空间分布特征
|
||
4. 对水质反演代表性的评估
|
||
"""
|
||
if not self.enable_ai_analysis:
|
||
return "AI分析已禁用。根据采样点分布图,红色点标识了采样点位置,可观察采样点在水体中的分布情况和覆盖范围。"
|
||
|
||
try:
|
||
if not Path(sampling_map_path).exists():
|
||
return "采样点分布图文件不存在,无法进行分析。"
|
||
|
||
analysis_prompt = """请详细分析这张采样点分布图,并严格按照以下要求输出:
|
||
|
||
【分析要求】
|
||
1. 采样点数量:估算图中有多少个采样点(红色点)
|
||
2. 分布情况:描述采样点在水体中的分布是否均匀,是否有聚集或稀疏区域
|
||
3. 覆盖范围:采样点是否覆盖了主要水域,是否有未覆盖的区域
|
||
4. 空间特征:采样点分布在哪些方位(如上下游、左右岸等)
|
||
5. 代表性评估:简要评价当前采样点布局对水质参数反演的代表性
|
||
|
||
【输出格式要求】
|
||
- 使用客观、准确的描述
|
||
- 避免推测性语言
|
||
- 控制在200字以内
|
||
|
||
【示例输出格式】
|
||
"图中共有约XX个采样点,分布...,覆盖...,在...区域较为密集,...区域较为稀疏。
|
||
采样点整体覆盖了主要水体区域,但在...区域采样不足。
|
||
当前布局对水质反演具有较好的代表性,建议..."
|
||
|
||
请根据图像内容给出专业分析。"""
|
||
|
||
result = self._ai_chat(
|
||
self.ollama_vision_model,
|
||
"你是一位专业的水质采样设计专家,擅长评估采样点布局的合理性和代表性。",
|
||
analysis_prompt,
|
||
Path(sampling_map_path)
|
||
)
|
||
|
||
# 如果返回内容为空或太短,使用默认文本
|
||
if not result or len(result) < 20:
|
||
return "根据采样点分布图分析,红色点标识了采样点位置,分布在水体各个区域。采样点覆盖范围较广,空间布局合理,能够较好地代表整体水质状况,为后续水质参数反演提供了可靠的数据基础。"
|
||
|
||
return result
|
||
|
||
except Exception as e:
|
||
return f"AI分析失败: {str(e)}。根据采样点分布图,红色点标识了采样点位置,分布在水体中,覆盖了主要水域区域,具有较好的代表性。"
|
||
|
||
def _setup_header_and_footer(self, section):
|
||
"""设置页眉:图片在最左侧 + 中间文字"""
|
||
header = section.header
|
||
|
||
# 清空现有段落
|
||
for paragraph in header.paragraphs:
|
||
p = paragraph._element
|
||
p.getparent().remove(p)
|
||
|
||
# 创建新段落用于页眉
|
||
header_para = header.add_paragraph()
|
||
|
||
# 1. 最左侧图片 - 使用相对路径
|
||
header_img_path = get_resource_path("data/icons/word/lica.png")
|
||
if os.path.isfile(header_img_path):
|
||
try:
|
||
run_img = header_para.add_run()
|
||
run_img.add_picture(str(header_img_path), width=Inches(1.6))
|
||
except Exception as e:
|
||
print(f"页眉图片加载失败: {e}")
|
||
header_para.add_run("■ ")
|
||
else:
|
||
header_para.add_run("■ ") # 图片不存在时的占位
|
||
|
||
# 2. 中间文字 - “水质参数报告”
|
||
run_text = header_para.add_run(" 水质参数报告")
|
||
run_text.font.name = self.chinese_font
|
||
run_text.font.size = Pt(11)
|
||
run_text._element.rPr.rFonts.set(qn('w:eastAsia'), self.chinese_font)
|
||
|
||
# 左对齐,让图片在最左侧
|
||
header_para.alignment = WD_ALIGN_PARAGRAPH.LEFT
|
||
|
||
# 设置页眉边距
|
||
section.header_distance = Cm(0.8)
|
||
|
||
# 设置页脚页码
|
||
footer = section.footer
|
||
footer_para = footer.paragraphs[0] if footer.paragraphs else footer.add_paragraph()
|
||
footer_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||
|
||
# 添加页码字段
|
||
run = footer_para.add_run()
|
||
fldChar1 = OxmlElement('w:fldChar')
|
||
fldChar1.set(qn('w:fldCharType'), 'begin')
|
||
run._element.append(fldChar1)
|
||
|
||
instrText = OxmlElement('w:instrText')
|
||
instrText.text = 'PAGE'
|
||
run._element.append(instrText)
|
||
|
||
fldChar2 = OxmlElement('w:fldChar')
|
||
fldChar2.set(qn('w:fldCharType'), 'end')
|
||
run._element.append(fldChar2)
|
||
|
||
# 添加 "页" 字
|
||
footer_para.add_run(' / ')
|
||
run2 = footer_para.add_run()
|
||
fldChar3 = OxmlElement('w:fldChar')
|
||
fldChar3.set(qn('w:fldCharType'), 'begin')
|
||
run2._element.append(fldChar3)
|
||
|
||
instrText2 = OxmlElement('w:instrText')
|
||
instrText2.text = 'NUMPAGES'
|
||
run2._element.append(instrText2)
|
||
|
||
fldChar4 = OxmlElement('w:fldChar')
|
||
fldChar4.set(qn('w:fldCharType'), 'end')
|
||
run2._element.append(fldChar4)
|
||
|
||
footer_para.add_run(' 页')
|
||
|
||
# 设置页脚字体
|
||
for run in footer_para.runs:
|
||
run.font.size = Pt(9)
|
||
run.font.name = self.chinese_font
|
||
if hasattr(run, '_element') and hasattr(run._element, 'rPr'):
|
||
run._element.rPr.rFonts.set(qn('w:eastAsia'), self.chinese_font)
|
||
|
||
|
||
def _add_result_analysis_section(
|
||
self,
|
||
doc,
|
||
vis_dir: Path,
|
||
start_figure_num: int = 1,
|
||
all_image_analyses: Optional[List[Dict[str, Any]]] = None,
|
||
progress=None,
|
||
) -> int:
|
||
"""添加结果分析章节 - 统计表格 + 相关性热力图(热力图在表格下方)"""
|
||
h1 = doc.add_heading("4 结果分析", level=1)
|
||
self._style_heading(h1, level=1)
|
||
|
||
# 1. 添加统计分析表格(带编号)
|
||
h2 = doc.add_heading("4.1 水质参数统计分析", level=2)
|
||
self._style_heading(h2, level=2)
|
||
|
||
# 从工作目录的4_processed_data文件夹查找CSV文件
|
||
work_dir_path = vis_dir.parent
|
||
processed_data_dir = work_dir_path / "4_processed_data"
|
||
|
||
if not processed_data_dir.exists():
|
||
doc.add_paragraph(f"未找到数据处理目录: {processed_data_dir}")
|
||
doc.add_page_break()
|
||
return start_figure_num
|
||
|
||
csv_files = list(processed_data_dir.glob("*.csv"))
|
||
if not csv_files:
|
||
doc.add_paragraph(f"在 {processed_data_dir} 目录下未找到CSV统计数据文件。")
|
||
doc.add_page_break()
|
||
return start_figure_num
|
||
|
||
csv_path = csv_files[0] # 使用找到的第一个CSV文件
|
||
|
||
try:
|
||
df_full = pd.read_csv(csv_path, sep=',')
|
||
df = df_full.iloc[:, 2:] # 跳过前两列(纬度、经度),直接用列号
|
||
|
||
# 自动统计剩余列
|
||
stats_data = []
|
||
for i in range(df.shape[1]):
|
||
col = df.columns[i]
|
||
clean_col = str(col).strip()
|
||
try:
|
||
data = df.iloc[:, i].dropna()
|
||
if len(data) > 0:
|
||
stats_data.append({
|
||
'参数': clean_col,
|
||
'点位数': len(data),
|
||
'最大值': f"{data.max():.4f}",
|
||
'最小值': f"{data.min():.4f}",
|
||
'平均值': f"{data.mean():.4f}",
|
||
'标准差': f"{data.std():.4f}"
|
||
})
|
||
except Exception as e:
|
||
print(f"跳过列 {col}: {e}")
|
||
|
||
if stats_data:
|
||
# 创建统计表格
|
||
table = doc.add_table(rows=1, cols=6, style='Table Grid')
|
||
hdr_cells = table.rows[0].cells
|
||
hdr_cells[0].text = '参数'
|
||
hdr_cells[1].text = '点位数'
|
||
hdr_cells[2].text = '最大值'
|
||
hdr_cells[3].text = '最小值'
|
||
hdr_cells[4].text = '平均值'
|
||
hdr_cells[5].text = '标准差'
|
||
|
||
for stat in stats_data:
|
||
row_cells = table.add_row().cells
|
||
row_cells[0].text = stat['参数']
|
||
row_cells[1].text = str(stat['点位数'])
|
||
row_cells[2].text = stat['最大值']
|
||
row_cells[3].text = stat['最小值']
|
||
row_cells[4].text = stat['平均值']
|
||
row_cells[5].text = stat['标准差']
|
||
|
||
else:
|
||
doc.add_paragraph("CSV文件中未找到有效的参数数据。")
|
||
|
||
except Exception as e:
|
||
doc.add_paragraph(f"读取CSV文件时出错: {str(e)}")
|
||
#增加空格
|
||
doc.add_paragraph()
|
||
# 表格生成完成后,添加 AI 分析
|
||
if stats_data:
|
||
analysis_text = self._analyze_statistics(stats_data, [s['参数'] for s in stats_data])
|
||
self._add_ai_analysis_paragraph(doc, analysis_text)
|
||
|
||
doc.add_paragraph() # 表格和热力图之间的空行
|
||
|
||
# 2. 添加相关性热力图(放在表格下方)
|
||
h3 = doc.add_heading("4.2 水质参数相关性分析", level=2)
|
||
self._style_heading(h3, level=2)
|
||
heatmap_path = vis_dir / "correlation_heatmap.png"
|
||
figure_num = start_figure_num
|
||
if heatmap_path.exists():
|
||
try:
|
||
# 使用统一的图像插入方法
|
||
caption_text = f"图{figure_num} 水质参数相关性热力图"
|
||
self._add_image_with_caption(doc, str(heatmap_path), caption_text, width=Inches(6.0))
|
||
doc.add_paragraph("(颜色越深表示相关性越强,红色为正相关,蓝色为负相关)")
|
||
|
||
analysis_text = self._analyze_and_cache_image(
|
||
image_path=heatmap_path,
|
||
image_type="correlation_heatmap",
|
||
param="综合",
|
||
figure_num=figure_num,
|
||
)
|
||
self._add_ai_analysis_paragraph(doc, analysis_text)
|
||
if all_image_analyses is not None:
|
||
all_image_analyses.append(
|
||
{
|
||
"figure_num": figure_num,
|
||
"param": "综合",
|
||
"image_type": "correlation_heatmap",
|
||
"image_name": heatmap_path.name,
|
||
"analysis": analysis_text,
|
||
}
|
||
)
|
||
except Exception as e:
|
||
doc.add_paragraph(f"[相关性热力图插入失败: {e}]")
|
||
else:
|
||
doc.add_paragraph(f"[未找到相关性热力图: {heatmap_path.name}]")
|
||
|
||
# 热力图处理结束(无论成功/失败)更新进度条
|
||
try:
|
||
if progress is not None:
|
||
progress.update(1)
|
||
except Exception:
|
||
pass
|
||
|
||
doc.add_page_break()
|
||
return start_figure_num + (1 if heatmap_path.exists() else 0)
|
||
|
||
def _add_physical_inversion_section(
|
||
self,
|
||
doc: Document,
|
||
work_dir: Path,
|
||
start_figure_num: int = 1,
|
||
all_image_analyses: Optional[List[Dict[str, Any]]] = None,
|
||
progress=None,
|
||
) -> int:
|
||
"""新增章节:物理模型反演浓度统计与分析(第4节之后)"""
|
||
conc_dir = work_dir / "9_Concentration"
|
||
if not conc_dir.is_dir():
|
||
doc.add_paragraph("[物理反演浓度章节:9_Concentration 目录不存在,已跳过]")
|
||
return start_figure_num
|
||
|
||
stats_csv = conc_dir / "statistics_summary.csv"
|
||
charts_dir = conc_dir / "charts"
|
||
|
||
h = doc.add_heading("4.1 物理模型反演浓度统计与分析", level=2)
|
||
self._style_heading(h, level=2)
|
||
|
||
fig_num = start_figure_num
|
||
|
||
if stats_csv.is_file():
|
||
try:
|
||
stats_df = pd.read_csv(stats_csv)
|
||
table = doc.add_table(rows=1, cols=len(stats_df.columns))
|
||
table.style = "Table Grid"
|
||
hdr_cells = table.rows[0].cells
|
||
for i, col_name in enumerate(stats_df.columns):
|
||
hdr_cells[i].text = str(col_name)
|
||
for run in hdr_cells[i].paragraphs[0].runs:
|
||
run.font.name = self.chinese_font
|
||
run.font.size = Pt(10)
|
||
run.font.bold = True
|
||
run._element.rPr.rFonts.set(qn('w:eastAsia'), self.chinese_font)
|
||
for _, row_data in stats_df.iterrows():
|
||
row_cells = table.add_row().cells
|
||
for i, val in enumerate(row_data):
|
||
row_cells[i].text = str(val)
|
||
for run in row_cells[i].paragraphs[0].runs:
|
||
run.font.name = self.chinese_font
|
||
run.font.size = Pt(10)
|
||
run._element.rPr.rFonts.set(qn('w:eastAsia'), self.chinese_font)
|
||
doc.add_paragraph()
|
||
except Exception as e:
|
||
doc.add_paragraph(f"[浓度统计表插入失败: {e}]")
|
||
else:
|
||
doc.add_paragraph("[浓度统计表不存在: statistics_summary.csv]")
|
||
|
||
if charts_dir.is_dir():
|
||
image_extensions = ['*.png', '*.jpg', '*.jpeg', '*.tif', '*.tiff']
|
||
chart_files: List[Path] = []
|
||
for ext in image_extensions:
|
||
chart_files.extend(sorted(charts_dir.glob(ext)))
|
||
for chart_file in chart_files:
|
||
caption_text = f"图{fig_num} {chart_file.stem} 分布图"
|
||
if self._add_image_with_caption(doc, str(chart_file), caption_text, width=Inches(5.5)):
|
||
if all_image_analyses is not None:
|
||
image_type = "boxplot" if "boxplot" in chart_file.stem.lower() else "distribution"
|
||
analysis_text = self._analyze_and_cache_image(
|
||
image_path=chart_file,
|
||
image_type=image_type,
|
||
param=chart_file.stem,
|
||
figure_num=fig_num,
|
||
)
|
||
self._add_ai_analysis_paragraph(doc, analysis_text)
|
||
all_image_analyses.append({
|
||
"figure_num": fig_num,
|
||
"param": chart_file.stem,
|
||
"image_type": image_type,
|
||
"image_name": chart_file.name,
|
||
"analysis": analysis_text,
|
||
})
|
||
fig_num += 1
|
||
try:
|
||
if progress is not None:
|
||
progress.update(1)
|
||
except Exception:
|
||
pass
|
||
else:
|
||
doc.add_paragraph("[浓度图表目录不存在: 9_Concentration/charts/]")
|
||
|
||
return fig_num
|
||
|
||
# ==================== 使用示例 ====================
|
||
|
||
def generate_full_water_quality_report(
|
||
work_dir: str = "./work_dir",
|
||
ai_config: Optional[ReportGenerationConfig] = None,
|
||
):
|
||
"""生成包含所有水质参数的完整报告。"""
|
||
generator = WaterQualityReportGenerator(work_dir=work_dir, ai_config=ai_config)
|
||
return generator.generate_report(
|
||
work_dir=work_dir,
|
||
parameters=None,
|
||
report_title="水质参数反演分析完整报告",
|
||
)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
# 默认生成完整报告(包含所有13个水质参数)
|
||
report_path = generate_full_water_quality_report()
|
||
print(f"完整水质报告已生成: {report_path}")
|