109 lines
4.9 KiB
Python
109 lines
4.9 KiB
Python
"""a script to split a folder of flights into multiple flights based on the monotonic changes in altitude.
|
|
Takes a csv file as an input and will split it out into several csvs.
|
|
Needs course azimuths to work properly.
|
|
"""
|
|
|
|
import argparse
|
|
from pathlib import Path
|
|
|
|
import colorama
|
|
import numpy as np
|
|
import pandas as pd
|
|
|
|
from src.gasflux import processing
|
|
|
|
colorama.init()
|
|
|
|
|
|
def main(target_dir, search_string, filter_mask, output_dir=None):
|
|
df_list = {}
|
|
for file_path in Path(target_dir).rglob(search_string):
|
|
df = pd.read_csv(file_path)
|
|
df_list[file_path] = df
|
|
|
|
for file_path, df in df_list.items():
|
|
print(colorama.Fore.WHITE + "----------------------------------------")
|
|
if filter_mask in df.columns: # check for boolean mask columns
|
|
df = df[~df[filter_mask]].reset_index(drop=True)
|
|
df, groupdict = processing.monotonic_transect_groups(df)
|
|
last_transect = None
|
|
last_group_trend = None
|
|
for group in df["group"].unique():
|
|
# leaving this out for now as the group folder is a good identifier
|
|
# if len(df['group'].unique()) == 1:
|
|
# print(colorama.Fore.RED + f'Skipping{file_path.name} - only one group')
|
|
# continue
|
|
group_df = df[df["group"] == group]
|
|
avg_altitudes = group_df.groupby("transect_num")["height_ato"].mean().values
|
|
avg_change = (
|
|
sum([avg_altitudes[i + 1] - avg_altitudes[i] for i in range(len(avg_altitudes) - 1)])
|
|
/ len(avg_altitudes)
|
|
if len(avg_altitudes) > 1
|
|
else 0
|
|
)
|
|
# what's the trend
|
|
current_group_trend = "ascending" if avg_change > 0 else "descending"
|
|
# add last transect from previous group if there's a change in trend
|
|
if last_group_trend and last_group_trend != current_group_trend and last_transect is not None:
|
|
group_df = pd.concat([last_transect, group_df])
|
|
avg_altitudes = group_df.groupby("transect_num")["height_ato"].mean().values
|
|
avg_altitudes = np.array(avg_altitudes)
|
|
# check if the group is monotonic
|
|
is_monotonic = np.all(np.diff(avg_altitudes) > 0) or np.all(np.diff(avg_altitudes) < 0)
|
|
if not is_monotonic: # exception
|
|
print(f"group {group} is not monotonic - check the code!")
|
|
formatted_avg_altitudes = ", ".join([f"{alt:.1f}" for alt in avg_altitudes])
|
|
# do it where transect is the maximum transect number
|
|
last_transect = group_df[group_df["transect_num"] == group_df["transect_num"].max()].copy()
|
|
last_transect.loc[:, "transect_num"] = 0
|
|
last_group_trend = current_group_trend
|
|
unique_transects = len(group_df["transect_num"].unique())
|
|
if unique_transects < 3:
|
|
print(
|
|
colorama.Fore.RED + f"Not saving {group} from {file_path} - too few transects"
|
|
f"({unique_transects} transects at {formatted_avg_altitudes}m)"
|
|
)
|
|
else:
|
|
file_path = Path(file_path)
|
|
if output_dir:
|
|
output_path = Path(
|
|
Path(output_dir)
|
|
/ f"{file_path.parents[1].name}"
|
|
/ f"{file_path.parent.name}_{group}"
|
|
/ f"{file_path.stem}_{group}.csv"
|
|
)
|
|
else:
|
|
# assuming in date and time folder
|
|
output_path = Path(
|
|
file_path.parent.parent.parent # time -> date -> analysis
|
|
/ "splits"
|
|
/ file_path.parent.parent.name # date
|
|
/ f"{file_path.parent.name}_{group}" # time + group
|
|
/ f"{file_path.stem}_{group}.csv"
|
|
)
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
group_df.to_csv(output_path, index=False)
|
|
print(
|
|
colorama.Fore.GREEN
|
|
+ f'wrote {unique_transects} monotonic transects at {formatted_avg_altitudes}m to \n'
|
|
f'{"/".join(output_path.parts[-5:])}'
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--target-dir", help="the directory to search for csvs", default="survey")
|
|
parser.add_argument("--search-string", help="the string to search for in the directory", default="*filtered.csv")
|
|
parser.add_argument(
|
|
"--filter-mask", help="name of a column with a boolean filter; TRUE means discard", default="filtered"
|
|
)
|
|
parser.add_argument("--output-dir", help="the metadirectory to save the date/time/csvs to", required=False)
|
|
|
|
args = parser.parse_args()
|
|
main(
|
|
args.target_dir,
|
|
args.search_string,
|
|
args.filter_mask,
|
|
args.output_dir,
|
|
)
|