Initial commit

This commit is contained in:
2026-04-10 16:46:45 +08:00
commit 4fd1b0a203
165 changed files with 25698 additions and 0 deletions

View File

@ -0,0 +1,147 @@
'''transform.py
TODO: Add MNF option
parser.add_argument("-t", help="Transform type", type = str)
'''
import argparse
import pickle
import os
from shutil import which
import ray
import numpy as np
from sklearn.decomposition import PCA
import hytools as ht
from hytools.io.envi import WriteENVI
def main():
'''
This script exports PCA transformed images. A single image or a group
of images can be provided as input. In the case of a group of images the PCA decomposition will be performed
using sampled data pooled from all images. All images must be of the same format, either all ENVI or all NEON.
Images can be optionally mosaicked to a GEOTIFF. Mosaicking is done using gdal_merge.py and therefore
requires gdal to be installed. Mosiacking won't work properly on images with a rotation.
'''
parser = argparse.ArgumentParser(description = "Perform a PCA")
parser.add_argument('images',help="Input image pathnames", nargs='*')
parser.add_argument('output_dir',help="Output directory", type = str)
parser.add_argument("-comps", help="Number of components to export", type = int,required=False,default=10)
parser.add_argument("-sample", help="Percent of data to subsample", type = float,required=False,default=0.1)
parser.add_argument("-merge", help="Use gdal_merge.py to mosaic PCA images", required=False, action='store_true')
parser.add_argument("-inv", help="Apply inverse transform", required=False, action='store_true')
args = parser.parse_args()
if not args.output_dir.endswith("/"):
args.output_dir+="/"
if ray.is_initialized():
ray.shutdown()
ray.init(num_cpus = len(args.images))
hytool = ray.remote(ht.HyTools)
actors = [hytool.remote() for image in args.images]
if args.images[0].endswith('.h5'):
file_type = 'neon'
else:
file_type = 'envi'
_ = ray.get([a.read_file.remote(image,file_type) for a,image in zip(actors,args.images)])
# Sample data
samples = ray.get([a.do.remote(subsample,args) for a in actors])
# Center, scale and fit PCA transform
X = np.concatenate(samples).astype(np.float32)
x_mean = X.mean(axis=0)[np.newaxis,:]
X -=x_mean
x_std = X.std(axis=0,ddof=1)[np.newaxis,:]
X /=x_std
X = X[~np.isnan(X.sum(axis=1)) & ~np.isinf(X.sum(axis=1)),:]
print('Performing PCA decomposition')
pca = PCA(n_components=args.comps)
pca.fit(X)
pca_pkl = pickle.dumps(pca)
args.pca_pkl = pca_pkl
args.x_mean = x_mean
args.x_std = x_std
#Apply tranform and export
_ = ray.get([a.do.remote(apply_transform,args) for a in actors])
if args.merge and len(args.images) > 1:
if which('gdal_merge.py') is not None:
print('Mosaicking flightlines')
output_files = ["%s%s_pca" %(args.output_dir,image) for image in \
ray.get([a.do.remote(lambda x : x.base_name) for a in actors])]
string = ['gdal_merge.py','-o', '%stransform_mosaic.tif' % args.output_dir] + output_files
os.system(' '.join(string))
else:
print('gdal_merge.py not found, exiting.')
def subsample(hy_obj,args):
print("Sampling %s" % os.path.basename(hy_obj.file_name))
# Select 'sample_perc' % of pixels for modeling
# This can probably be written more concisely
sub_samples = np.zeros((hy_obj.lines,hy_obj.columns)).astype(bool)
idx = np.array(np.where(hy_obj.mask['no_data'])).T
idxRand= idx[np.random.choice(range(len(idx)),int(len(idx)*args.sample), replace = False)].T
sub_samples[idxRand[0],idxRand[1]] = True
hy_obj.mask['samples'] = sub_samples
X = []
hy_obj.create_bad_bands([[300,400],[1300,1450],[1780,2000],[2450,2600]])
for band_num,band in enumerate(hy_obj.bad_bands):
if ~band:
X.append(hy_obj.get_band(band_num,mask='samples'))
return np.array(X).T
def apply_transform(hy_obj,args):
print("Exporting %s PCA" % hy_obj.base_name)
pca = pickle.loads(args.pca_pkl)
output_name = '%s/%s_pca%03d_inv' % (args.output_dir,hy_obj.base_name,pca.n_components)
header_dict = hy_obj.get_header()
header_dict['bands'] = (~hy_obj.bad_bands).sum()
header_dict['wavelength'] = hy_obj.wavelengths[~hy_obj.bad_bands]
header_dict['fwhm'] = hy_obj.fwhm[~hy_obj.bad_bands]
header_dict['data type'] = 4
header_dict['data ignore value'] = 0
if not args.inv:
header_dict['bands'] = pca.n_components
output_name = '%s/%s_pca%03d' % (args.output_dir,hy_obj.base_name,pca.n_components)
header_dict['wavelength'] = []
header_dict['fwhm'] = []
writer = WriteENVI(output_name,header_dict)
iterator = hy_obj.iterate(by = 'chunk',chunk_size = (500,500))
while not iterator.complete:
chunk = iterator.read_next()
X_chunk = chunk[:,:,~hy_obj.bad_bands].astype(np.float32)
X_chunk = X_chunk.reshape((X_chunk.shape[0]*X_chunk.shape[1],X_chunk.shape[2]))
X_chunk -=args.x_mean
X_chunk /=args.x_std
X_chunk[np.isnan(X_chunk) | np.isinf(X_chunk)] = 0
pca_chunk= pca.transform(X_chunk)
if args.inv:
pca_chunk = pca.inverse_transform(pca_chunk)
pca_chunk *=args.x_std
pca_chunk +=args.x_mean
pca_chunk = pca_chunk.reshape((chunk.shape[0],chunk.shape[1],header_dict['bands']))
pca_chunk[chunk[:,:,0] == hy_obj.no_data] =0
writer.write_chunk(pca_chunk,
iterator.current_line,
iterator.current_column)
if __name__== "__main__":
main()