通过配置文件监控目录，当出现csv时做以下操作：

1、转辐亮度； 2、调用丰算法计算sif，输出成csv； 3、将2中的csv上传到ftp服务器；
2025-08-20 14:47:19 +08:00
commit 7666368d18
3 changed files with 615 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,244 @@
+# tc
+/.idea
+test_data
+tmp
+
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+
+# User-specific stuff
+.idea/**/workspace.xml
+.idea/**/tasks.xml
+.idea/**/usage.statistics.xml
+.idea/**/dictionaries
+.idea/**/shelf
+
+# AWS User-specific
+.idea/**/aws.xml
+
+# Generated files
+.idea/**/contentModel.xml
+
+# Sensitive or high-churn files
+.idea/**/dataSources/
+.idea/**/dataSources.ids
+.idea/**/dataSources.local.xml
+.idea/**/sqlDataSources.xml
+.idea/**/dynamic.xml
+.idea/**/uiDesigner.xml
+.idea/**/dbnavigator.xml
+
+# Gradle
+.idea/**/gradle.xml
+.idea/**/libraries
+
+# Gradle and Maven with auto-import
+# When using Gradle or Maven with auto-import, you should exclude module files,
+# since they will be recreated, and may cause churn.  Uncomment if using
+# auto-import.
+# .idea/artifacts
+# .idea/compiler.xml
+# .idea/jarRepositories.xml
+# .idea/modules.xml
+# .idea/*.iml
+# .idea/modules
+# *.iml
+# *.ipr
+
+# CMake
+cmake-build-*/
+
+# Mongo Explorer plugin
+.idea/**/mongoSettings.xml
+
+# File-based project format
+*.iws
+
+# IntelliJ
+out/
+
+# mpeltonen/sbt-idea plugin
+.idea_modules/
+
+# JIRA plugin
+atlassian-ide-plugin.xml
+
+# Cursive Clojure plugin
+.idea/replstate.xml
+
+# SonarLint plugin
+.idea/sonarlint/
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+com_crashlytics_export_strings.xml
+crashlytics.properties
+crashlytics-build.properties
+fabric.properties
+
+# Editor-based Rest Client
+.idea/httpRequests
+
+# Android studio 3.1+ serialized cache file
+.idea/caches/build_file_checksums.ser
--- a/config.ini
+++ b/config.ini
@ -0,0 +1,9 @@
+[FTP]
+host = 172.16.0.73
+port = 22
+user = ftpuser
+password = 123
+target_dir = /home/ftpuser/
+[monitor]
+WATCH_DIR = D:\PycharmProjects\sif_data_parse\test_data
+cal_dir = D:\PycharmProjects\sif_data_parse\test_data\cal
--- a/main.py
+++ b/main.py
@ -0,0 +1,362 @@
+import csv, tempfile, os, re
+import struct
+import time
+import numpy as np
+import argparse
+import paramiko
+import shutil
+import configparser
+from ftplib import FTP
+from pathlib import Path
+from watchdog.observers import Observer
+from watchdog.events import FileSystemEventHandler
+from datetime import datetime
+
+
+def parse_sif_csv(_file_path):
+    _metadata = {}
+    _wavelengths = []
+    _spectra_data = []
+
+    with open(_file_path, encoding='utf-8') as f:
+        reader = csv.reader(f)
+        rows = list(reader)
+
+        # 第1行：除了第一列，后续是变量名和值交替出现
+        row1 = rows[0][1:]
+        for i in range(0, len(row1), 2):
+            if i + 1 < len(row1):
+                _metadata[row1[i]] = row1[i + 1]
+
+        # 第2行：前一个是变量名，后一个是变量值
+        if len(rows) > 1 and len(rows[1]) >= 2:
+            _metadata[rows[1][0]] = rows[1][1]
+
+        # 第3行：除了第一列，后续是变量名和值交替出现
+        row3 = rows[2][1:]
+        for i in range(0, len(row3), 2):
+            if i + 1 < len(row3):
+                _metadata[row3[i]] = row3[i + 1]
+
+        # 第4行是波长
+        _wavelengths = np.array([float(w) for w in rows[3][1:]])
+
+        # 第5行忽略
+
+        # 从第6行开始是光谱数据
+        for row in rows[5:]:
+            if len(row) < 4 or row[1].lower() != "valid":
+                continue  # 跳过表头或无效行
+            try:
+                entry = {
+                    "Location": row[0],
+                    "Valid": row[1],
+                    "Integration": int(row[2]),
+                    "DN": np.array([float(val) for val in row[3:]])
+                }
+                _spectra_data.append(entry)
+            except ValueError:
+                continue  # 跳过不能解析的行
+
+    return _metadata, _wavelengths, _spectra_data
+
+def read_cal(_file_path):
+    # 定义结构体格式
+    # unsigned int (4) + float (4) + int (4) + 4096 floats (4 each) + 4096 doubles (8 each) + 4096 doubles (8 each)
+    fmt = '<I f i ' + '4096f' + '4096d' + '4096d'  # 小端
+
+    # 计算总字节大小
+    struct_size = struct.calcsize(fmt)
+
+    with open(_file_path, 'rb') as f:
+        data = f.read(struct_size)
+        unpacked = struct.unpack(fmt, data)
+
+    # 拆分数据
+    _uiExposureTimeInMS = unpacked[0]
+    _fTemperature = unpacked[1]
+    _iPixels = unpacked[2]
+    _fWaveLength = np.array(unpacked[3:3+4096])[0:_iPixels]
+    _dCal_Gain = np.array(unpacked[3+4096 : 3+4096+4096])[0:_iPixels]
+    _dCal_Offset = np.array(unpacked[3+4096+4096:])[0:_iPixels]
+
+    return _uiExposureTimeInMS,_fTemperature,_iPixels,_fWaveLength,_dCal_Gain,_dCal_Offset
+
+def write_file(_in_path, _out_path, _spectra_data):
+    with open(_in_path, encoding='utf-8') as f:
+        reader = csv.reader(f)
+        rows = list(reader)
+
+        with open(_out_path, 'w', newline='', encoding='utf-8') as f:
+            writer = csv.writer(f)
+
+            # 写入前4行原样（元数据和波长信息）
+            writer.writerow(rows[0])
+            writer.writerow(rows[1])
+            writer.writerow(rows[2])
+            writer.writerow(rows[3])
+            writer.writerow(rows[4])  # 第5行照抄
+
+            # 写入处理后的光谱数据
+            for entry in _spectra_data:
+                row = [entry["Location"], entry["Valid"], entry["Integration"]] + list(entry["RAD"])
+                writer.writerow(row)
+
+def get_sorted_files_by_number(folder_path):
+    # 获取文件夹下所有文件和子文件夹
+    all_items = os.listdir(folder_path)
+
+    # 过滤出文件（排除文件夹），并转换为绝对路径
+    files = [
+        os.path.abspath(os.path.join(folder_path, item))
+        for item in all_items
+        if os.path.isfile(os.path.join(folder_path, item))
+    ]
+
+    # 定义一个函数来提取文件名中的数字（仅文件名部分）
+    def extract_numbers(filepath):
+        filename = os.path.basename(filepath)  # 获取文件名（不含路径）
+        numbers = re.findall(r'\d+', filename)  # 提取数字
+        return [int(num) for num in numbers]  # 转为整数
+
+    # 按文件名中的数字排序文件
+    sorted_files = sorted(files, key=lambda x: extract_numbers(x))
+
+    return sorted_files
+
+# 配置读取函数
+def load_config(config_path='config.ini'):
+    config = configparser.ConfigParser()
+    config.read(config_path)
+    return config
+
+class CSVFileHandler(FileSystemEventHandler):
+    def __init__(self, ftp_config):
+        super().__init__()
+        self.ftp_config = ftp_config
+
+    def on_created(self, event):
+        if event.is_directory:
+            return
+        if event.src_path.lower().endswith('.csv'):
+            file_path = os.path.abspath(event.src_path)
+            print(f"发现CSV文件: {file_path}")
+
+            # 选择定标文件夹
+            cal_dir = self.ftp_config['monitor']['cal_dir']
+            a=1
+            if "towersif20" in file_path:
+                cal_dir = os.path.join(cal_dir,"20")
+            elif "towersif21" in file_path:
+                cal_dir = os.path.join(cal_dir,"21")
+
+            time.sleep(0.1)  # 文件一出现就处理文件，偶发permission deny，所以等待100ms
+            _ = self.process_csv(file_path, cal_dir)
+
+            # 为csv添加有效性字段
+            for i in _:
+                self.add_validity_column_to_file(i)
+
+            self.send_via_sftp(_)
+
+    def send_via_sftp(self, file_paths, max_retries=3, retry_delay=5):
+        retries = 0
+        ssh = None
+        sftp = None
+
+        while retries < max_retries:
+            try:
+                print("正在尝试连接 SFTP 服务器...")
+
+                # 创建 SSH 客户端
+                ssh = paramiko.SSHClient()
+                ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())  # 生产环境建议更安全的方式
+
+                # 连接参数（参考 FileZilla 的设置）
+                ssh.connect(
+                    hostname=self.ftp_config['FTP']['host'],
+                    port=int(self.ftp_config['FTP'].get('port', 22)),
+                    username=self.ftp_config['FTP']['user'],
+                    password=self.ftp_config['FTP']['password'],
+                    timeout=30,
+                    allow_agent=False,  # 禁用 ssh-agent（避免 'lost ssh-agent' 错误）
+                    look_for_keys=False,  # 不自动查找密钥（强制使用密码认证）
+                )
+
+                print("SFTP 连接成功，准备上传文件...")
+                sftp = ssh.open_sftp()
+
+                # 检查并切换到目标目录
+                remote_dir = self.ftp_config['FTP'].get('target_dir', '.')
+                try:
+                    sftp.chdir(remote_dir)  # 尝试进入目录
+                except IOError:
+                    print(f"远程目录不存在，尝试创建: {remote_dir}")
+                    sftp.mkdir(remote_dir)  # 尝试创建目录
+                    sftp.chdir(remote_dir)
+
+                # 上传多个文件
+                success_count = 0
+                for file_path in file_paths:
+                    try:
+                        filename = os.path.basename(file_path)
+                        sftp.put(file_path, filename)
+                        print(f"✅ 文件上传成功: {filename}")
+                        success_count += 1
+                    except Exception as e:
+                        print(f"❌ 文件上传失败 {file_path}: {e}")
+
+                return success_count == len(file_paths)  # 全部成功返回True，否则False
+
+            except paramiko.AuthenticationException as e:
+                print(f"❌ 认证失败: {e}")
+                return False
+            except paramiko.SSHException as e:
+                retries += 1
+                print(f"❌ SFTP 连接失败（尝试 {retries}/{max_retries}）: {e}")
+                if retries < max_retries:
+                    time.sleep(retry_delay)
+            except Exception as e:
+                print(f"❌ 未知错误: {e}")
+                return False
+            finally:
+                if sftp:
+                    sftp.close()
+                if ssh:
+                    ssh.close()
+
+        print(f"❌ 上传失败（已达最大重试次数 {max_retries}）")
+        return False
+
+    def process_csv(self, input_csv, input_cal):
+        # 提取文件夹路径
+        folder_path = os.path.dirname(input_csv)
+        base_name = os.path.basename(input_csv)  # 获取文件名（含扩展名）
+        name_part, ext = os.path.splitext(base_name)  # 拆分文件名和扩展名
+        parts = name_part.split('_', 1)  # 在第一个 _ 处分割
+        today = datetime.now()
+        formatted_date = today.strftime("%Y_%m_%d")
+        new_name = f"{formatted_date}_{parts[1]}{ext}"  # 组合新文件名
+
+        # tmp_folder = os.path.join(os.path.dirname(ftp_config['monitor']['WATCH_DIR']), "tmp")
+        tmp_folder = os.path.join(Path(ftp_config['monitor']['WATCH_DIR']).parent, "tmp")
+        if not os.path.exists(tmp_folder):
+            os.makedirs(tmp_folder)
+            print(f"文件夹已创建: {tmp_folder}")
+        else:
+            print(f"文件夹已存在: {tmp_folder}")
+
+        rad_folder = os.path.join(tmp_folder, "rad")
+        if os.path.exists(rad_folder):
+            shutil.rmtree(rad_folder)
+        os.makedirs(rad_folder)
+
+        sif_folder = os.path.join(tmp_folder, "sif")
+        if not os.path.exists(sif_folder):
+            os.makedirs(sif_folder)
+            print(f"文件夹已创建: {sif_folder}")
+        else:
+            print(f"文件夹已存在: {sif_folder}")
+
+        rad_path = os.path.join(rad_folder, new_name)
+
+        metadata, wavelengths, spectra_data = parse_sif_csv(input_csv)
+
+        sorted_cal_files_path = get_sorted_files_by_number(input_cal)
+        for i in range(len(spectra_data)):
+            uiExposureTimeInMS, fTemperature, iPixels, fWaveLength, dCal_Gain, dCal_Offset = read_cal(
+                sorted_cal_files_path[i])
+
+            gain_scale = uiExposureTimeInMS / spectra_data[i]['Integration']
+            data_gain_adjust = dCal_Gain * gain_scale
+            spectra_data[i]['RAD'] = spectra_data[i]['DN'] * data_gain_adjust
+
+        write_file(input_csv, rad_path, spectra_data)
+
+        # 调用丰算法
+        if os.name == "nt": # Windows
+            program_path = r"python D:\PycharmProjects\sif\sif_retrieval.py"
+            standard_sif_path = r"C:\EasySIF\standard_sif.csv"
+        elif os.name == "posix": # Linux/macOS/Unix-like
+            program_path = r"python3 /root/sif/feng/sif_retrieval.py"
+            standard_sif_path = r"/root/sif/feng/standard_sif.csv"
+
+        input_path = rad_folder
+
+        file_name_tmp = parts[0] + "_" + new_name.split('.')[0]
+
+        output_path_3fld = os.path.join(sif_folder, file_name_tmp + "_3fld.csv")
+        param_3fld = r"[740,780],[756,759],[761,762] P1 3fld"
+
+        output_path_sfld = os.path.join(sif_folder, file_name_tmp + "_sfld.csv")
+        param_sfld = r"[740,780],[756,759] P1 sfld"
+
+        output_path_sfm = os.path.join(sif_folder, file_name_tmp + "_sfm.csv")
+        param_sfm = r" [759,770],760 P1 sfm"
+
+        command_str_3fld = program_path + " " + standard_sif_path + " " + input_path + " " + output_path_3fld + " " + param_3fld
+        command_str_sfld = program_path + " " + standard_sif_path + " " + input_path + " " + output_path_sfld + " " + param_sfld
+        command_str_sfm = program_path + " " + standard_sif_path + " " + input_path + " " + output_path_sfm + " " + param_sfm
+
+        return_code = os.system(command_str_3fld)
+        return_code = os.system(command_str_sfld)
+        return_code = os.system(command_str_sfm)
+        print(f"命令返回状态码: {return_code}")
+
+        return output_path_3fld, output_path_sfld, output_path_sfm
+
+    def add_validity_column_to_file(self, file_path):
+        # 创建临时文件
+        temp_file = tempfile.NamedTemporaryFile(mode='w', delete=False, newline='')
+
+        try:
+            with open(file_path, 'r', newline='') as csvfile, temp_file:
+                reader = csv.reader(csvfile)
+                writer = csv.writer(temp_file)
+
+                # 读取所有行
+                rows = list(reader)
+
+                if len(rows) < 2:
+                    return  # 如果行数不足，直接返回
+
+                # 添加validity列
+                rows[0].append('validity')
+                rows[1].append('1')
+
+                # 写入临时文件
+                writer.writerows(rows)
+
+            # 用临时文件替换原始文件
+            shutil.move(temp_file.name, file_path)
+
+        except Exception as e:
+            # 如果出错，删除临时文件
+            os.unlink(temp_file.name)
+            raise e
+
+
+# Press the green button in the gutter to run the script.
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description="监控文件夹的状态，当出现新的csv时，提取sif，并通过ftp发送。", prog='sif.')
+
+    parser.add_argument('-i', '--input_ini', required=True, type=str, help='输入ini配置文件路径。')
+
+    parser.add_argument("-v", "--version", action='version', version='%(prog)s 1.0')
+    # parser.add_argument('-v', '--verbose', action='store_true', help='启用详细模式')
+
+    args = parser.parse_args()
+
+    ftp_config = load_config(args.input_ini)
+    event_handler = CSVFileHandler(ftp_config)
+    observer = Observer()
+    observer.schedule(event_handler, ftp_config['monitor']['WATCH_DIR'], recursive=True)
+    observer.start()
+    print(f"正在监控目录：{ftp_config['monitor']['WATCH_DIR']}")
+
+    try:
+        while True:
+            time.sleep(1)
+    except KeyboardInterrupt:
+        observer.stop()
+    observer.join()