第一次提交:用于根河用户towersif原始数据分割
1. 任总:csv中的2个光谱仪分别有6个位置的数据,位置1、3、5实际上都是测的光纤位置1的数据(测天空),所以分割后都改为P1; 2. 添加了界面分别输入原始数据文件夹和输出文件夹,在输出文件夹中新建文件夹split1、split2和split3用于保存分割后的数据;
This commit is contained in:
166
.gitignore
vendored
Normal file
166
.gitignore
vendored
Normal file
@ -0,0 +1,166 @@
|
|||||||
|
# 唐超添加
|
||||||
|
/.idea
|
||||||
|
*.zip
|
||||||
|
/Data
|
||||||
|
Data_split
|
||||||
|
|
||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
share/python-wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.nox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
*.py,cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
cover/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
db.sqlite3
|
||||||
|
db.sqlite3-journal
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
.pybuilder/
|
||||||
|
target/
|
||||||
|
|
||||||
|
# Jupyter Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# IPython
|
||||||
|
profile_default/
|
||||||
|
ipython_config.py
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
# For a library or package, you might want to ignore these files since the code is
|
||||||
|
# intended to run in multiple environments; otherwise, check them in:
|
||||||
|
# .python-version
|
||||||
|
|
||||||
|
# pipenv
|
||||||
|
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||||
|
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||||
|
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||||
|
# install all needed dependencies.
|
||||||
|
#Pipfile.lock
|
||||||
|
|
||||||
|
# poetry
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||||
|
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||||
|
# commonly ignored for libraries.
|
||||||
|
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||||
|
#poetry.lock
|
||||||
|
|
||||||
|
# pdm
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||||
|
#pdm.lock
|
||||||
|
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||||
|
# in version control.
|
||||||
|
# https://pdm.fming.dev/#use-with-ide
|
||||||
|
.pdm.toml
|
||||||
|
|
||||||
|
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||||
|
__pypackages__/
|
||||||
|
|
||||||
|
# Celery stuff
|
||||||
|
celerybeat-schedule
|
||||||
|
celerybeat.pid
|
||||||
|
|
||||||
|
# SageMath parsed files
|
||||||
|
*.sage.py
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# Spyder project settings
|
||||||
|
.spyderproject
|
||||||
|
.spyproject
|
||||||
|
|
||||||
|
# Rope project settings
|
||||||
|
.ropeproject
|
||||||
|
|
||||||
|
# mkdocs documentation
|
||||||
|
/site
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
.dmypy.json
|
||||||
|
dmypy.json
|
||||||
|
|
||||||
|
# Pyre type checker
|
||||||
|
.pyre/
|
||||||
|
|
||||||
|
# pytype static type analyzer
|
||||||
|
.pytype/
|
||||||
|
|
||||||
|
# Cython debug symbols
|
||||||
|
cython_debug/
|
||||||
|
|
||||||
|
# PyCharm
|
||||||
|
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||||
|
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||||
|
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||||
|
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||||
|
#.idea/
|
14
main.py
Normal file
14
main.py
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
import easygui
|
||||||
|
from towerDataSplit import DataSplit
|
||||||
|
|
||||||
|
|
||||||
|
inputDataPath = easygui.diropenbox('第1/2步', '选择towersif原始数据文件夹')
|
||||||
|
outputDataPath = easygui.diropenbox('第2/2步', '选择输出文件夹')
|
||||||
|
|
||||||
|
# inputDataPath = r"D:\PycharmProjects\towerDataSplit\sampleData"
|
||||||
|
# outputDataPath = r"D:\PycharmProjects\towerDataSplit\Data_split"
|
||||||
|
|
||||||
|
data_split = DataSplit(inputDataPath, outputDataPath)
|
||||||
|
data_split.start_split_process()
|
||||||
|
|
||||||
|
easygui.msgbox("数据分割完成!", "提示")
|
19
sampleData/2022_09_03/genhe_06_44_44.csv
Normal file
19
sampleData/2022_09_03/genhe_06_44_44.csv
Normal file
File diff suppressed because one or more lines are too long
19
sampleData/2022_09_03/genhe_07_07_02.csv
Normal file
19
sampleData/2022_09_03/genhe_07_07_02.csv
Normal file
File diff suppressed because one or more lines are too long
19
sampleData/2022_09_04/genhe_06_45_25.csv
Normal file
19
sampleData/2022_09_04/genhe_06_45_25.csv
Normal file
File diff suppressed because one or more lines are too long
19
sampleData/2022_09_04/genhe_07_04_33.csv
Normal file
19
sampleData/2022_09_04/genhe_07_04_33.csv
Normal file
File diff suppressed because one or more lines are too long
19
sampleData/2022_09_05/genhe_06_43_52.csv
Normal file
19
sampleData/2022_09_05/genhe_06_43_52.csv
Normal file
File diff suppressed because one or more lines are too long
19
sampleData/2022_09_05/genhe_07_08_22.csv
Normal file
19
sampleData/2022_09_05/genhe_07_08_22.csv
Normal file
File diff suppressed because one or more lines are too long
19
sampleData/2022_09_06/genhe_06_43_48.csv
Normal file
19
sampleData/2022_09_06/genhe_06_43_48.csv
Normal file
File diff suppressed because one or more lines are too long
19
sampleData/2022_09_06/genhe_07_06_59.csv
Normal file
19
sampleData/2022_09_06/genhe_07_06_59.csv
Normal file
File diff suppressed because one or more lines are too long
121
towerDataSplit.py
Normal file
121
towerDataSplit.py
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import os
|
||||||
|
from datetime import datetime
|
||||||
|
import time
|
||||||
|
import math
|
||||||
|
import argparse
|
||||||
|
import copy
|
||||||
|
|
||||||
|
|
||||||
|
class DataSplit:
|
||||||
|
def __init__(self, inputDataPath, outputDataPath):
|
||||||
|
self.inputPath = inputDataPath
|
||||||
|
self.outputPath = outputDataPath
|
||||||
|
|
||||||
|
def create_directory(self, directory):
|
||||||
|
"""
|
||||||
|
可直接创建深层文件夹,例如,如果B和C没有,直接创建/A/B/C/D也能成功
|
||||||
|
:param directory:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
if not os.path.exists(directory): # 判断所在目录下是否有该文件名的文件夹
|
||||||
|
os.makedirs(directory)
|
||||||
|
|
||||||
|
def read_data(self, file_path):
|
||||||
|
"""
|
||||||
|
读取csv数据,并修改信息以适配分割后的csv
|
||||||
|
:param file_path:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
guess_column_number = 10000
|
||||||
|
|
||||||
|
df = pd.read_csv(file_path, header=None, sep=',', names=range(guess_column_number))
|
||||||
|
correct_column_number = max(df.iloc[2, 6], df.iloc[4, 6]) + 3
|
||||||
|
|
||||||
|
df.drop(list(range(int(correct_column_number), guess_column_number)), axis=1,
|
||||||
|
inplace=True) # inplace=True会就地修改,不会创建副本
|
||||||
|
# df = pd.read_csv(file_path, header=None, sep=',', names=range(int(correct_column_number))) # 重新读文件
|
||||||
|
|
||||||
|
# 修改信息
|
||||||
|
df.iloc[1, 3] = 2
|
||||||
|
|
||||||
|
# 任总:数据中显示的P3也是采集的位置1的数据,老师嫌采集时间过长,想采集2次天空
|
||||||
|
df.iloc[9, 0] = df.iloc[9, 0].replace("3", "1")
|
||||||
|
df.iloc[11, 0] = df.iloc[11, 0].replace("5", "1")
|
||||||
|
df.iloc[15, 0] = df.iloc[15, 0].replace("3", "1")
|
||||||
|
df.iloc[17, 0] = df.iloc[17, 0].replace("5", "1")
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
def split_data(self, df):
|
||||||
|
"""
|
||||||
|
分割pandas的dataframe的对应的那些行到新的dataframe中,并返回
|
||||||
|
:param df:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
df1 = df.iloc[[0, 1, 2, 3, 4, 5, 6, 7, 8, 13, 14]]
|
||||||
|
df2 = df.iloc[[0, 1, 2, 3, 4, 5, 6, 9, 10, 15, 16]]
|
||||||
|
df3 = df.iloc[[0, 1, 2, 3, 4, 5, 6, 11, 12, 17, 18]]
|
||||||
|
|
||||||
|
return df1, df2, df3
|
||||||
|
|
||||||
|
def write_data(self, df, file_path):
|
||||||
|
# 写csv方式1的问题:有的整数会在后面加.0(例如77777 → 77777.0,后期在c# 写的easysif中不能直接转int,需要先转double,在转int)
|
||||||
|
df.to_csv(file_path, index=False, header=False)
|
||||||
|
|
||||||
|
# 写csv方式2:也有方式1的问题,推测pandas是基于numpy实现的,所以numpy具有此问题,那么pandas也具有此问题
|
||||||
|
# np.array(df1).tofile(file_path_out1, sep=',')
|
||||||
|
|
||||||
|
def start_split_process(self):
|
||||||
|
"""
|
||||||
|
分割处理数据主函数
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
time_start = time.time() # 记录开始时间
|
||||||
|
|
||||||
|
directories = os.listdir(self.inputPath)
|
||||||
|
self.validFiles = []
|
||||||
|
|
||||||
|
for directory in directories:
|
||||||
|
directory1_tmp = os.path.join(self.outputPath, "split1", directory)
|
||||||
|
directory2_tmp = os.path.join(self.outputPath, "split2", directory)
|
||||||
|
directory3_tmp = os.path.join(self.outputPath, "split3", directory)
|
||||||
|
self.create_directory(directory1_tmp)
|
||||||
|
self.create_directory(directory2_tmp)
|
||||||
|
self.create_directory(directory3_tmp)
|
||||||
|
|
||||||
|
files = os.listdir(os.path.join(self.inputPath, directory))
|
||||||
|
for file in files:
|
||||||
|
file_path = os.path.join(self.inputPath, directory, file)
|
||||||
|
if os.path.splitext(file_path)[1] != '.csv':
|
||||||
|
continue
|
||||||
|
|
||||||
|
df = self.read_data(file_path)
|
||||||
|
df1, df2, df3 = self.split_data(df)
|
||||||
|
|
||||||
|
file_path_out1 = os.path.join(directory1_tmp, file)
|
||||||
|
file_path_out2 = os.path.join(directory2_tmp, file)
|
||||||
|
file_path_out3 = os.path.join(directory3_tmp, file)
|
||||||
|
|
||||||
|
self.write_data(df1, file_path_out1)
|
||||||
|
self.write_data(df2, file_path_out2)
|
||||||
|
self.write_data(df3, file_path_out3)
|
||||||
|
|
||||||
|
time_end = time.time() # 记录结束时间
|
||||||
|
time_sum = time_end - time_start # 计算的时间差为程序的执行时间,单位为秒/s
|
||||||
|
print("处理用时:%d" % time_sum)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("inputDataPath", help="输入数据路径")
|
||||||
|
parser.add_argument("outputDataPath", help="输出路径。")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
data_split = DataSplit(args.inputDataPath, args.outputDataPath)
|
||||||
|
data_split.start_split_process()
|
||||||
|
|
||||||
|
print("completed!!")
|
Reference in New Issue
Block a user