Initial commit: GasFlux project with core processing pipelines

2026-01-05 09:33:27 +08:00
commit f085d7c5fe
50 changed files with 10634 additions and 0 deletions
--- a/tools/mono_flight_splitter.py
+++ b/tools/mono_flight_splitter.py
@ -0,0 +1,108 @@
+"""a script to split a folder of flights into multiple flights based on the monotonic changes in altitude.
+Takes a csv file as an input and will split it out into several csvs.
+Needs course azimuths to work properly.
+"""
+
+import argparse
+from pathlib import Path
+
+import colorama
+import numpy as np
+import pandas as pd
+
+from src.gasflux import processing
+
+colorama.init()
+
+
+def main(target_dir, search_string, filter_mask, output_dir=None):
+    df_list = {}
+    for file_path in Path(target_dir).rglob(search_string):
+        df = pd.read_csv(file_path)
+        df_list[file_path] = df
+
+    for file_path, df in df_list.items():
+        print(colorama.Fore.WHITE + "----------------------------------------")
+        if filter_mask in df.columns:  # check for boolean mask columns
+            df = df[~df[filter_mask]].reset_index(drop=True)
+        df, groupdict = processing.monotonic_transect_groups(df)
+        last_transect = None
+        last_group_trend = None
+        for group in df["group"].unique():
+            # leaving this out for now as the group folder is a good identifier
+            # if len(df['group'].unique()) == 1:
+            #     print(colorama.Fore.RED + f'Skipping{file_path.name} - only one group')
+            #     continue
+            group_df = df[df["group"] == group]
+            avg_altitudes = group_df.groupby("transect_num")["height_ato"].mean().values
+            avg_change = (
+                sum([avg_altitudes[i + 1] - avg_altitudes[i] for i in range(len(avg_altitudes) - 1)])
+                / len(avg_altitudes)
+                if len(avg_altitudes) > 1
+                else 0
+            )
+            # what's the trend
+            current_group_trend = "ascending" if avg_change > 0 else "descending"
+            # add last transect from previous group if there's a change in trend
+            if last_group_trend and last_group_trend != current_group_trend and last_transect is not None:
+                group_df = pd.concat([last_transect, group_df])
+                avg_altitudes = group_df.groupby("transect_num")["height_ato"].mean().values
+            avg_altitudes = np.array(avg_altitudes)
+            # check if the group is monotonic
+            is_monotonic = np.all(np.diff(avg_altitudes) > 0) or np.all(np.diff(avg_altitudes) < 0)
+            if not is_monotonic:  # exception
+                print(f"group {group} is not monotonic - check the code!")
+            formatted_avg_altitudes = ", ".join([f"{alt:.1f}" for alt in avg_altitudes])
+            # do it where transect is the maximum transect number
+            last_transect = group_df[group_df["transect_num"] == group_df["transect_num"].max()].copy()
+            last_transect.loc[:, "transect_num"] = 0
+            last_group_trend = current_group_trend
+            unique_transects = len(group_df["transect_num"].unique())
+            if unique_transects < 3:
+                print(
+                    colorama.Fore.RED + f"Not saving {group} from {file_path} - too few transects"
+                    f"({unique_transects} transects at {formatted_avg_altitudes}m)"
+                )
+            else:
+                file_path = Path(file_path)
+                if output_dir:
+                    output_path = Path(
+                        Path(output_dir)
+                        / f"{file_path.parents[1].name}"
+                        / f"{file_path.parent.name}_{group}"
+                        / f"{file_path.stem}_{group}.csv"
+                    )
+                else:
+                    # assuming in date and time folder
+                    output_path = Path(
+                        file_path.parent.parent.parent  # time -> date -> analysis
+                        / "splits"
+                        / file_path.parent.parent.name  # date
+                        / f"{file_path.parent.name}_{group}"  # time + group
+                        / f"{file_path.stem}_{group}.csv"
+                    )
+                output_path.parent.mkdir(parents=True, exist_ok=True)
+                group_df.to_csv(output_path, index=False)
+                print(
+                    colorama.Fore.GREEN
+                    + f'wrote {unique_transects} monotonic transects at {formatted_avg_altitudes}m to \n'
+                    f'{"/".join(output_path.parts[-5:])}'
+                )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--target-dir", help="the directory to search for csvs", default="survey")
+    parser.add_argument("--search-string", help="the string to search for in the directory", default="*filtered.csv")
+    parser.add_argument(
+        "--filter-mask", help="name of a column with a boolean filter; TRUE means discard", default="filtered"
+    )
+    parser.add_argument("--output-dir", help="the metadirectory to save the date/time/csvs to", required=False)
+
+    args = parser.parse_args()
+    main(
+        args.target_dir,
+        args.search_string,
+        args.filter_mask,
+        args.output_dir,
+    )