From 7666368d18f790651d7821e3c1dad46f9acfbbb0 Mon Sep 17 00:00:00 2001 From: tangchao0503 <735056338@qq.com> Date: Wed, 20 Aug 2025 14:47:19 +0800 Subject: [PATCH] =?UTF-8?q?=E9=80=9A=E8=BF=87=E9=85=8D=E7=BD=AE=E6=96=87?= =?UTF-8?q?=E4=BB=B6=E7=9B=91=E6=8E=A7=E7=9B=AE=E5=BD=95=EF=BC=8C=E5=BD=93?= =?UTF-8?q?=E5=87=BA=E7=8E=B0csv=E6=97=B6=E5=81=9A=E4=BB=A5=E4=B8=8B?= =?UTF-8?q?=E6=93=8D=E4=BD=9C=EF=BC=9A=201=E3=80=81=E8=BD=AC=E8=BE=90?= =?UTF-8?q?=E4=BA=AE=E5=BA=A6=EF=BC=9B=202=E3=80=81=E8=B0=83=E7=94=A8?= =?UTF-8?q?=E4=B8=B0=E7=AE=97=E6=B3=95=E8=AE=A1=E7=AE=97sif=EF=BC=8C?= =?UTF-8?q?=E8=BE=93=E5=87=BA=E6=88=90csv=EF=BC=9B=203=E3=80=81=E5=B0=862?= =?UTF-8?q?=E4=B8=AD=E7=9A=84csv=E4=B8=8A=E4=BC=A0=E5=88=B0ftp=E6=9C=8D?= =?UTF-8?q?=E5=8A=A1=E5=99=A8=EF=BC=9B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 244 ++++++++++++++++++++++++++++++++++++ config.ini | 9 ++ main.py | 362 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 615 insertions(+) create mode 100644 .gitignore create mode 100644 config.ini create mode 100644 main.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6116fde --- /dev/null +++ b/.gitignore @@ -0,0 +1,244 @@ +# tc +/.idea +test_data +tmp + + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# AWS User-specific +.idea/**/aws.xml + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# SonarLint plugin +.idea/sonarlint/ + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser diff --git a/config.ini b/config.ini new file mode 100644 index 0000000..ecafd76 --- /dev/null +++ b/config.ini @@ -0,0 +1,9 @@ +[FTP] +host = 172.16.0.73 +port = 22 +user = ftpuser +password = 123 +target_dir = /home/ftpuser/ +[monitor] +WATCH_DIR = D:\PycharmProjects\sif_data_parse\test_data +cal_dir = D:\PycharmProjects\sif_data_parse\test_data\cal \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..b0d7ad8 --- /dev/null +++ b/main.py @@ -0,0 +1,362 @@ +import csv, tempfile, os, re +import struct +import time +import numpy as np +import argparse +import paramiko +import shutil +import configparser +from ftplib import FTP +from pathlib import Path +from watchdog.observers import Observer +from watchdog.events import FileSystemEventHandler +from datetime import datetime + + +def parse_sif_csv(_file_path): + _metadata = {} + _wavelengths = [] + _spectra_data = [] + + with open(_file_path, encoding='utf-8') as f: + reader = csv.reader(f) + rows = list(reader) + + # 第1行:除了第一列,后续是变量名和值交替出现 + row1 = rows[0][1:] + for i in range(0, len(row1), 2): + if i + 1 < len(row1): + _metadata[row1[i]] = row1[i + 1] + + # 第2行:前一个是变量名,后一个是变量值 + if len(rows) > 1 and len(rows[1]) >= 2: + _metadata[rows[1][0]] = rows[1][1] + + # 第3行:除了第一列,后续是变量名和值交替出现 + row3 = rows[2][1:] + for i in range(0, len(row3), 2): + if i + 1 < len(row3): + _metadata[row3[i]] = row3[i + 1] + + # 第4行是波长 + _wavelengths = np.array([float(w) for w in rows[3][1:]]) + + # 第5行忽略 + + # 从第6行开始是光谱数据 + for row in rows[5:]: + if len(row) < 4 or row[1].lower() != "valid": + continue # 跳过表头或无效行 + try: + entry = { + "Location": row[0], + "Valid": row[1], + "Integration": int(row[2]), + "DN": np.array([float(val) for val in row[3:]]) + } + _spectra_data.append(entry) + except ValueError: + continue # 跳过不能解析的行 + + return _metadata, _wavelengths, _spectra_data + +def read_cal(_file_path): + # 定义结构体格式 + # unsigned int (4) + float (4) + int (4) + 4096 floats (4 each) + 4096 doubles (8 each) + 4096 doubles (8 each) + fmt = '