Initial commit: GasFlux project with core processing pipelines

This commit is contained in:
2026-01-05 09:33:27 +08:00
commit f085d7c5fe
50 changed files with 10634 additions and 0 deletions

View File

@ -0,0 +1,108 @@
"""a script to split a folder of flights into multiple flights based on the monotonic changes in altitude.
Takes a csv file as an input and will split it out into several csvs.
Needs course azimuths to work properly.
"""
import argparse
from pathlib import Path
import colorama
import numpy as np
import pandas as pd
from src.gasflux import processing
colorama.init()
def main(target_dir, search_string, filter_mask, output_dir=None):
df_list = {}
for file_path in Path(target_dir).rglob(search_string):
df = pd.read_csv(file_path)
df_list[file_path] = df
for file_path, df in df_list.items():
print(colorama.Fore.WHITE + "----------------------------------------")
if filter_mask in df.columns: # check for boolean mask columns
df = df[~df[filter_mask]].reset_index(drop=True)
df, groupdict = processing.monotonic_transect_groups(df)
last_transect = None
last_group_trend = None
for group in df["group"].unique():
# leaving this out for now as the group folder is a good identifier
# if len(df['group'].unique()) == 1:
# print(colorama.Fore.RED + f'Skipping{file_path.name} - only one group')
# continue
group_df = df[df["group"] == group]
avg_altitudes = group_df.groupby("transect_num")["height_ato"].mean().values
avg_change = (
sum([avg_altitudes[i + 1] - avg_altitudes[i] for i in range(len(avg_altitudes) - 1)])
/ len(avg_altitudes)
if len(avg_altitudes) > 1
else 0
)
# what's the trend
current_group_trend = "ascending" if avg_change > 0 else "descending"
# add last transect from previous group if there's a change in trend
if last_group_trend and last_group_trend != current_group_trend and last_transect is not None:
group_df = pd.concat([last_transect, group_df])
avg_altitudes = group_df.groupby("transect_num")["height_ato"].mean().values
avg_altitudes = np.array(avg_altitudes)
# check if the group is monotonic
is_monotonic = np.all(np.diff(avg_altitudes) > 0) or np.all(np.diff(avg_altitudes) < 0)
if not is_monotonic: # exception
print(f"group {group} is not monotonic - check the code!")
formatted_avg_altitudes = ", ".join([f"{alt:.1f}" for alt in avg_altitudes])
# do it where transect is the maximum transect number
last_transect = group_df[group_df["transect_num"] == group_df["transect_num"].max()].copy()
last_transect.loc[:, "transect_num"] = 0
last_group_trend = current_group_trend
unique_transects = len(group_df["transect_num"].unique())
if unique_transects < 3:
print(
colorama.Fore.RED + f"Not saving {group} from {file_path} - too few transects"
f"({unique_transects} transects at {formatted_avg_altitudes}m)"
)
else:
file_path = Path(file_path)
if output_dir:
output_path = Path(
Path(output_dir)
/ f"{file_path.parents[1].name}"
/ f"{file_path.parent.name}_{group}"
/ f"{file_path.stem}_{group}.csv"
)
else:
# assuming in date and time folder
output_path = Path(
file_path.parent.parent.parent # time -> date -> analysis
/ "splits"
/ file_path.parent.parent.name # date
/ f"{file_path.parent.name}_{group}" # time + group
/ f"{file_path.stem}_{group}.csv"
)
output_path.parent.mkdir(parents=True, exist_ok=True)
group_df.to_csv(output_path, index=False)
print(
colorama.Fore.GREEN
+ f'wrote {unique_transects} monotonic transects at {formatted_avg_altitudes}m to \n'
f'{"/".join(output_path.parts[-5:])}'
)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--target-dir", help="the directory to search for csvs", default="survey")
parser.add_argument("--search-string", help="the string to search for in the directory", default="*filtered.csv")
parser.add_argument(
"--filter-mask", help="name of a column with a boolean filter; TRUE means discard", default="filtered"
)
parser.add_argument("--output-dir", help="the metadirectory to save the date/time/csvs to", required=False)
args = parser.parse_args()
main(
args.target_dir,
args.search_string,
args.filter_mask,
args.output_dir,
)