Initial commit: GasFlux project with core processing pipelines
This commit is contained in:
108
tools/mono_flight_splitter.py
Normal file
108
tools/mono_flight_splitter.py
Normal file
@ -0,0 +1,108 @@
|
||||
"""a script to split a folder of flights into multiple flights based on the monotonic changes in altitude.
|
||||
Takes a csv file as an input and will split it out into several csvs.
|
||||
Needs course azimuths to work properly.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
import colorama
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from src.gasflux import processing
|
||||
|
||||
colorama.init()
|
||||
|
||||
|
||||
def main(target_dir, search_string, filter_mask, output_dir=None):
|
||||
df_list = {}
|
||||
for file_path in Path(target_dir).rglob(search_string):
|
||||
df = pd.read_csv(file_path)
|
||||
df_list[file_path] = df
|
||||
|
||||
for file_path, df in df_list.items():
|
||||
print(colorama.Fore.WHITE + "----------------------------------------")
|
||||
if filter_mask in df.columns: # check for boolean mask columns
|
||||
df = df[~df[filter_mask]].reset_index(drop=True)
|
||||
df, groupdict = processing.monotonic_transect_groups(df)
|
||||
last_transect = None
|
||||
last_group_trend = None
|
||||
for group in df["group"].unique():
|
||||
# leaving this out for now as the group folder is a good identifier
|
||||
# if len(df['group'].unique()) == 1:
|
||||
# print(colorama.Fore.RED + f'Skipping{file_path.name} - only one group')
|
||||
# continue
|
||||
group_df = df[df["group"] == group]
|
||||
avg_altitudes = group_df.groupby("transect_num")["height_ato"].mean().values
|
||||
avg_change = (
|
||||
sum([avg_altitudes[i + 1] - avg_altitudes[i] for i in range(len(avg_altitudes) - 1)])
|
||||
/ len(avg_altitudes)
|
||||
if len(avg_altitudes) > 1
|
||||
else 0
|
||||
)
|
||||
# what's the trend
|
||||
current_group_trend = "ascending" if avg_change > 0 else "descending"
|
||||
# add last transect from previous group if there's a change in trend
|
||||
if last_group_trend and last_group_trend != current_group_trend and last_transect is not None:
|
||||
group_df = pd.concat([last_transect, group_df])
|
||||
avg_altitudes = group_df.groupby("transect_num")["height_ato"].mean().values
|
||||
avg_altitudes = np.array(avg_altitudes)
|
||||
# check if the group is monotonic
|
||||
is_monotonic = np.all(np.diff(avg_altitudes) > 0) or np.all(np.diff(avg_altitudes) < 0)
|
||||
if not is_monotonic: # exception
|
||||
print(f"group {group} is not monotonic - check the code!")
|
||||
formatted_avg_altitudes = ", ".join([f"{alt:.1f}" for alt in avg_altitudes])
|
||||
# do it where transect is the maximum transect number
|
||||
last_transect = group_df[group_df["transect_num"] == group_df["transect_num"].max()].copy()
|
||||
last_transect.loc[:, "transect_num"] = 0
|
||||
last_group_trend = current_group_trend
|
||||
unique_transects = len(group_df["transect_num"].unique())
|
||||
if unique_transects < 3:
|
||||
print(
|
||||
colorama.Fore.RED + f"Not saving {group} from {file_path} - too few transects"
|
||||
f"({unique_transects} transects at {formatted_avg_altitudes}m)"
|
||||
)
|
||||
else:
|
||||
file_path = Path(file_path)
|
||||
if output_dir:
|
||||
output_path = Path(
|
||||
Path(output_dir)
|
||||
/ f"{file_path.parents[1].name}"
|
||||
/ f"{file_path.parent.name}_{group}"
|
||||
/ f"{file_path.stem}_{group}.csv"
|
||||
)
|
||||
else:
|
||||
# assuming in date and time folder
|
||||
output_path = Path(
|
||||
file_path.parent.parent.parent # time -> date -> analysis
|
||||
/ "splits"
|
||||
/ file_path.parent.parent.name # date
|
||||
/ f"{file_path.parent.name}_{group}" # time + group
|
||||
/ f"{file_path.stem}_{group}.csv"
|
||||
)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
group_df.to_csv(output_path, index=False)
|
||||
print(
|
||||
colorama.Fore.GREEN
|
||||
+ f'wrote {unique_transects} monotonic transects at {formatted_avg_altitudes}m to \n'
|
||||
f'{"/".join(output_path.parts[-5:])}'
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--target-dir", help="the directory to search for csvs", default="survey")
|
||||
parser.add_argument("--search-string", help="the string to search for in the directory", default="*filtered.csv")
|
||||
parser.add_argument(
|
||||
"--filter-mask", help="name of a column with a boolean filter; TRUE means discard", default="filtered"
|
||||
)
|
||||
parser.add_argument("--output-dir", help="the metadirectory to save the date/time/csvs to", required=False)
|
||||
|
||||
args = parser.parse_args()
|
||||
main(
|
||||
args.target_dir,
|
||||
args.search_string,
|
||||
args.filter_mask,
|
||||
args.output_dir,
|
||||
)
|
||||
Reference in New Issue
Block a user