BRDF/Flexbrdf/scripts/transform.py

'''transform.py

TODO: Add MNF option
    parser.add_argument("-t", help="Transform type", type = str)


'''
import argparse
import pickle
import os
from shutil import which
import ray
import numpy as np
from sklearn.decomposition import PCA
import hytools as ht
from hytools.io.envi import WriteENVI

def main():
    '''
    This script exports PCA transformed images. A single image or a group
    of images can be provided as input. In the case of a group of images the PCA decomposition will be performed
    using sampled data pooled from all images. All images must be of the same format, either all ENVI or all NEON.
    Images can be optionally mosaicked to a GEOTIFF. Mosaicking is done using gdal_merge.py and therefore
    requires gdal to be installed. Mosiacking won't work properly on images with a rotation.
    '''
    parser = argparse.ArgumentParser(description = "Perform a PCA")
    parser.add_argument('images',help="Input image pathnames", nargs='*')
    parser.add_argument('output_dir',help="Output directory", type = str)
    parser.add_argument("-comps", help="Number of components to export", type = int,required=False,default=10)
    parser.add_argument("-sample", help="Percent of data to subsample", type = float,required=False,default=0.1)
    parser.add_argument("-merge", help="Use gdal_merge.py to mosaic PCA images", required=False, action='store_true')
    parser.add_argument("-inv", help="Apply inverse transform", required=False, action='store_true')

    args = parser.parse_args()

    if not args.output_dir.endswith("/"):
        args.output_dir+="/"

    if ray.is_initialized():
        ray.shutdown()
    ray.init(num_cpus = len(args.images))

    hytool = ray.remote(ht.HyTools)
    actors = [hytool.remote() for image in args.images]

    if args.images[0].endswith('.h5'):
        file_type = 'neon'
    else:
        file_type = 'envi'

    _ = ray.get([a.read_file.remote(image,file_type) for a,image in zip(actors,args.images)])

    # Sample data
    samples  = ray.get([a.do.remote(subsample,args) for a in actors])

    # Center, scale and fit PCA transform
    X = np.concatenate(samples).astype(np.float32)
    x_mean = X.mean(axis=0)[np.newaxis,:]
    X -=x_mean
    x_std = X.std(axis=0,ddof=1)[np.newaxis,:]
    X /=x_std
    X = X[~np.isnan(X.sum(axis=1)) & ~np.isinf(X.sum(axis=1)),:]

    print('Performing PCA decomposition')
    pca = PCA(n_components=args.comps)
    pca.fit(X)
    pca_pkl = pickle.dumps(pca)

    args.pca_pkl = pca_pkl
    args.x_mean = x_mean
    args.x_std = x_std

    #Apply tranform and export
    _  = ray.get([a.do.remote(apply_transform,args) for a in actors])

    if args.merge and len(args.images) > 1:
        if which('gdal_merge.py') is not None:
            print('Mosaicking flightlines')
            output_files = ["%s%s_pca" %(args.output_dir,image) for image in \
                            ray.get([a.do.remote(lambda x : x.base_name) for a in actors])]
            string = ['gdal_merge.py','-o', '%stransform_mosaic.tif' % args.output_dir] + output_files
            os.system(' '.join(string))
        else:
            print('gdal_merge.py not found, exiting.')

def subsample(hy_obj,args):

    print("Sampling %s" % os.path.basename(hy_obj.file_name))

    # Select 'sample_perc' % of pixels for modeling
    # This can probably be written more concisely
    sub_samples = np.zeros((hy_obj.lines,hy_obj.columns)).astype(bool)
    idx = np.array(np.where(hy_obj.mask['no_data'])).T
    idxRand= idx[np.random.choice(range(len(idx)),int(len(idx)*args.sample), replace = False)].T
    sub_samples[idxRand[0],idxRand[1]] = True
    hy_obj.mask['samples'] = sub_samples

    X = []

    hy_obj.create_bad_bands([[300,400],[1300,1450],[1780,2000],[2450,2600]])
    for band_num,band in enumerate(hy_obj.bad_bands):
        if ~band:
            X.append(hy_obj.get_band(band_num,mask='samples'))
    return  np.array(X).T

def apply_transform(hy_obj,args):

    print("Exporting %s PCA" % hy_obj.base_name)
    pca = pickle.loads(args.pca_pkl)
    output_name = '%s/%s_pca%03d_inv' % (args.output_dir,hy_obj.base_name,pca.n_components)
    header_dict = hy_obj.get_header()
    header_dict['bands'] = (~hy_obj.bad_bands).sum()
    header_dict['wavelength'] = hy_obj.wavelengths[~hy_obj.bad_bands]
    header_dict['fwhm'] = hy_obj.fwhm[~hy_obj.bad_bands]
    header_dict['data type'] = 4
    header_dict['data ignore value'] = 0
    if not args.inv:
        header_dict['bands'] = pca.n_components
        output_name = '%s/%s_pca%03d' % (args.output_dir,hy_obj.base_name,pca.n_components)
        header_dict['wavelength'] = []
        header_dict['fwhm'] = []

    writer = WriteENVI(output_name,header_dict)
    iterator = hy_obj.iterate(by = 'chunk',chunk_size = (500,500))

    while not iterator.complete:
        chunk = iterator.read_next()

        X_chunk = chunk[:,:,~hy_obj.bad_bands].astype(np.float32)
        X_chunk = X_chunk.reshape((X_chunk.shape[0]*X_chunk.shape[1],X_chunk.shape[2]))
        X_chunk -=args.x_mean
        X_chunk /=args.x_std
        X_chunk[np.isnan(X_chunk) | np.isinf(X_chunk)] = 0
        pca_chunk=  pca.transform(X_chunk)
        if args.inv:
            pca_chunk = pca.inverse_transform(pca_chunk)
            pca_chunk *=args.x_std
            pca_chunk +=args.x_mean
        pca_chunk = pca_chunk.reshape((chunk.shape[0],chunk.shape[1],header_dict['bands']))
        pca_chunk[chunk[:,:,0] == hy_obj.no_data] =0

        writer.write_chunk(pca_chunk,
                           iterator.current_line,
                           iterator.current_column)

if __name__== "__main__":
    main()