'''transform.py TODO: Add MNF option parser.add_argument("-t", help="Transform type", type = str) ''' import argparse import pickle import os from shutil import which import ray import numpy as np from sklearn.decomposition import PCA import hytools as ht from hytools.io.envi import WriteENVI def main(): ''' This script exports PCA transformed images. A single image or a group of images can be provided as input. In the case of a group of images the PCA decomposition will be performed using sampled data pooled from all images. All images must be of the same format, either all ENVI or all NEON. Images can be optionally mosaicked to a GEOTIFF. Mosaicking is done using gdal_merge.py and therefore requires gdal to be installed. Mosiacking won't work properly on images with a rotation. ''' parser = argparse.ArgumentParser(description = "Perform a PCA") parser.add_argument('images',help="Input image pathnames", nargs='*') parser.add_argument('output_dir',help="Output directory", type = str) parser.add_argument("-comps", help="Number of components to export", type = int,required=False,default=10) parser.add_argument("-sample", help="Percent of data to subsample", type = float,required=False,default=0.1) parser.add_argument("-merge", help="Use gdal_merge.py to mosaic PCA images", required=False, action='store_true') parser.add_argument("-inv", help="Apply inverse transform", required=False, action='store_true') args = parser.parse_args() if not args.output_dir.endswith("/"): args.output_dir+="/" if ray.is_initialized(): ray.shutdown() ray.init(num_cpus = len(args.images)) hytool = ray.remote(ht.HyTools) actors = [hytool.remote() for image in args.images] if args.images[0].endswith('.h5'): file_type = 'neon' else: file_type = 'envi' _ = ray.get([a.read_file.remote(image,file_type) for a,image in zip(actors,args.images)]) # Sample data samples = ray.get([a.do.remote(subsample,args) for a in actors]) # Center, scale and fit PCA transform X = np.concatenate(samples).astype(np.float32) x_mean = X.mean(axis=0)[np.newaxis,:] X -=x_mean x_std = X.std(axis=0,ddof=1)[np.newaxis,:] X /=x_std X = X[~np.isnan(X.sum(axis=1)) & ~np.isinf(X.sum(axis=1)),:] print('Performing PCA decomposition') pca = PCA(n_components=args.comps) pca.fit(X) pca_pkl = pickle.dumps(pca) args.pca_pkl = pca_pkl args.x_mean = x_mean args.x_std = x_std #Apply tranform and export _ = ray.get([a.do.remote(apply_transform,args) for a in actors]) if args.merge and len(args.images) > 1: if which('gdal_merge.py') is not None: print('Mosaicking flightlines') output_files = ["%s%s_pca" %(args.output_dir,image) for image in \ ray.get([a.do.remote(lambda x : x.base_name) for a in actors])] string = ['gdal_merge.py','-o', '%stransform_mosaic.tif' % args.output_dir] + output_files os.system(' '.join(string)) else: print('gdal_merge.py not found, exiting.') def subsample(hy_obj,args): print("Sampling %s" % os.path.basename(hy_obj.file_name)) # Select 'sample_perc' % of pixels for modeling # This can probably be written more concisely sub_samples = np.zeros((hy_obj.lines,hy_obj.columns)).astype(bool) idx = np.array(np.where(hy_obj.mask['no_data'])).T idxRand= idx[np.random.choice(range(len(idx)),int(len(idx)*args.sample), replace = False)].T sub_samples[idxRand[0],idxRand[1]] = True hy_obj.mask['samples'] = sub_samples X = [] hy_obj.create_bad_bands([[300,400],[1300,1450],[1780,2000],[2450,2600]]) for band_num,band in enumerate(hy_obj.bad_bands): if ~band: X.append(hy_obj.get_band(band_num,mask='samples')) return np.array(X).T def apply_transform(hy_obj,args): print("Exporting %s PCA" % hy_obj.base_name) pca = pickle.loads(args.pca_pkl) output_name = '%s/%s_pca%03d_inv' % (args.output_dir,hy_obj.base_name,pca.n_components) header_dict = hy_obj.get_header() header_dict['bands'] = (~hy_obj.bad_bands).sum() header_dict['wavelength'] = hy_obj.wavelengths[~hy_obj.bad_bands] header_dict['fwhm'] = hy_obj.fwhm[~hy_obj.bad_bands] header_dict['data type'] = 4 header_dict['data ignore value'] = 0 if not args.inv: header_dict['bands'] = pca.n_components output_name = '%s/%s_pca%03d' % (args.output_dir,hy_obj.base_name,pca.n_components) header_dict['wavelength'] = [] header_dict['fwhm'] = [] writer = WriteENVI(output_name,header_dict) iterator = hy_obj.iterate(by = 'chunk',chunk_size = (500,500)) while not iterator.complete: chunk = iterator.read_next() X_chunk = chunk[:,:,~hy_obj.bad_bands].astype(np.float32) X_chunk = X_chunk.reshape((X_chunk.shape[0]*X_chunk.shape[1],X_chunk.shape[2])) X_chunk -=args.x_mean X_chunk /=args.x_std X_chunk[np.isnan(X_chunk) | np.isinf(X_chunk)] = 0 pca_chunk= pca.transform(X_chunk) if args.inv: pca_chunk = pca.inverse_transform(pca_chunk) pca_chunk *=args.x_std pca_chunk +=args.x_mean pca_chunk = pca_chunk.reshape((chunk.shape[0],chunk.shape[1],header_dict['bands'])) pca_chunk[chunk[:,:,0] == hy_obj.no_data] =0 writer.write_chunk(pca_chunk, iterator.current_line, iterator.current_column) if __name__== "__main__": main()