Initial commit

2026-04-10 16:46:45 +08:00
commit 4fd1b0a203
165 changed files with 25698 additions and 0 deletions
--- a/Flexbrdf/scripts/transform.py
+++ b/Flexbrdf/scripts/transform.py
@ -0,0 +1,147 @@
+'''transform.py
+
+TODO: Add MNF option
+    parser.add_argument("-t", help="Transform type", type = str)
+
+
+'''
+import argparse
+import pickle
+import os
+from shutil import which
+import ray
+import numpy as np
+from sklearn.decomposition import PCA
+import hytools as ht
+from hytools.io.envi import WriteENVI
+
+def main():
+    '''
+    This script exports PCA transformed images. A single image or a group
+    of images can be provided as input. In the case of a group of images the PCA decomposition will be performed
+    using sampled data pooled from all images. All images must be of the same format, either all ENVI or all NEON.
+    Images can be optionally mosaicked to a GEOTIFF. Mosaicking is done using gdal_merge.py and therefore
+    requires gdal to be installed. Mosiacking won't work properly on images with a rotation.
+    '''
+    parser = argparse.ArgumentParser(description = "Perform a PCA")
+    parser.add_argument('images',help="Input image pathnames", nargs='*')
+    parser.add_argument('output_dir',help="Output directory", type = str)
+    parser.add_argument("-comps", help="Number of components to export", type = int,required=False,default=10)
+    parser.add_argument("-sample", help="Percent of data to subsample", type = float,required=False,default=0.1)
+    parser.add_argument("-merge", help="Use gdal_merge.py to mosaic PCA images", required=False, action='store_true')
+    parser.add_argument("-inv", help="Apply inverse transform", required=False, action='store_true')
+
+    args = parser.parse_args()
+
+    if not args.output_dir.endswith("/"):
+        args.output_dir+="/"
+
+    if ray.is_initialized():
+        ray.shutdown()
+    ray.init(num_cpus = len(args.images))
+
+    hytool = ray.remote(ht.HyTools)
+    actors = [hytool.remote() for image in args.images]
+
+    if args.images[0].endswith('.h5'):
+        file_type = 'neon'
+    else:
+        file_type = 'envi'
+
+    _ = ray.get([a.read_file.remote(image,file_type) for a,image in zip(actors,args.images)])
+
+    # Sample data
+    samples  = ray.get([a.do.remote(subsample,args) for a in actors])
+
+    # Center, scale and fit PCA transform
+    X = np.concatenate(samples).astype(np.float32)
+    x_mean = X.mean(axis=0)[np.newaxis,:]
+    X -=x_mean
+    x_std = X.std(axis=0,ddof=1)[np.newaxis,:]
+    X /=x_std
+    X = X[~np.isnan(X.sum(axis=1)) & ~np.isinf(X.sum(axis=1)),:]
+
+    print('Performing PCA decomposition')
+    pca = PCA(n_components=args.comps)
+    pca.fit(X)
+    pca_pkl = pickle.dumps(pca)
+
+    args.pca_pkl = pca_pkl
+    args.x_mean = x_mean
+    args.x_std = x_std
+
+    #Apply tranform and export
+    _  = ray.get([a.do.remote(apply_transform,args) for a in actors])
+
+    if args.merge and len(args.images) > 1:
+        if which('gdal_merge.py') is not None:
+            print('Mosaicking flightlines')
+            output_files = ["%s%s_pca" %(args.output_dir,image) for image in \
+                            ray.get([a.do.remote(lambda x : x.base_name) for a in actors])]
+            string = ['gdal_merge.py','-o', '%stransform_mosaic.tif' % args.output_dir] + output_files
+            os.system(' '.join(string))
+        else:
+            print('gdal_merge.py not found, exiting.')
+
+def subsample(hy_obj,args):
+
+    print("Sampling %s" % os.path.basename(hy_obj.file_name))
+
+    # Select 'sample_perc' % of pixels for modeling
+    # This can probably be written more concisely
+    sub_samples = np.zeros((hy_obj.lines,hy_obj.columns)).astype(bool)
+    idx = np.array(np.where(hy_obj.mask['no_data'])).T
+    idxRand= idx[np.random.choice(range(len(idx)),int(len(idx)*args.sample), replace = False)].T
+    sub_samples[idxRand[0],idxRand[1]] = True
+    hy_obj.mask['samples'] = sub_samples
+
+    X = []
+
+    hy_obj.create_bad_bands([[300,400],[1300,1450],[1780,2000],[2450,2600]])
+    for band_num,band in enumerate(hy_obj.bad_bands):
+        if ~band:
+            X.append(hy_obj.get_band(band_num,mask='samples'))
+    return  np.array(X).T
+
+def apply_transform(hy_obj,args):
+
+    print("Exporting %s PCA" % hy_obj.base_name)
+    pca = pickle.loads(args.pca_pkl)
+    output_name = '%s/%s_pca%03d_inv' % (args.output_dir,hy_obj.base_name,pca.n_components)
+    header_dict = hy_obj.get_header()
+    header_dict['bands'] = (~hy_obj.bad_bands).sum()
+    header_dict['wavelength'] = hy_obj.wavelengths[~hy_obj.bad_bands]
+    header_dict['fwhm'] = hy_obj.fwhm[~hy_obj.bad_bands]
+    header_dict['data type'] = 4
+    header_dict['data ignore value'] = 0
+    if not args.inv:
+        header_dict['bands'] = pca.n_components
+        output_name = '%s/%s_pca%03d' % (args.output_dir,hy_obj.base_name,pca.n_components)
+        header_dict['wavelength'] = []
+        header_dict['fwhm'] = []
+
+    writer = WriteENVI(output_name,header_dict)
+    iterator = hy_obj.iterate(by = 'chunk',chunk_size = (500,500))
+
+    while not iterator.complete:
+        chunk = iterator.read_next()
+
+        X_chunk = chunk[:,:,~hy_obj.bad_bands].astype(np.float32)
+        X_chunk = X_chunk.reshape((X_chunk.shape[0]*X_chunk.shape[1],X_chunk.shape[2]))
+        X_chunk -=args.x_mean
+        X_chunk /=args.x_std
+        X_chunk[np.isnan(X_chunk) | np.isinf(X_chunk)] = 0
+        pca_chunk=  pca.transform(X_chunk)
+        if args.inv:
+            pca_chunk = pca.inverse_transform(pca_chunk)
+            pca_chunk *=args.x_std
+            pca_chunk +=args.x_mean
+        pca_chunk = pca_chunk.reshape((chunk.shape[0],chunk.shape[1],header_dict['bands']))
+        pca_chunk[chunk[:,:,0] == hy_obj.no_data] =0
+
+        writer.write_chunk(pca_chunk,
+                           iterator.current_line,
+                           iterator.current_column)
+
+if __name__== "__main__":
+    main()