Files
BRDF/Flexbrdf/scripts/imagepoints_ext_glt_ht.py
2026-04-10 16:46:45 +08:00

414 lines
14 KiB
Python

import os, sys
import numpy as np
import argparse
import pandas as pd
try:
from osgeo import gdal, osr, ogr
has_gdal=True
except ModuleNotFoundError:
print("No gdal installed")
has_gdal=False
import hytools as ht
from hytools.misc.point import local_point2spec, subset_band_list #*
#warnings.filterwarnings("ignore")
#np.seterr(divide='ignore', invalid='ignore')
def obs_point2spec(hyObj, img_row, img_col):
b1=hyObj.get_anc('sensor_az')[ img_row,img_col]
b2=hyObj.get_anc('sensor_zn')[ img_row,img_col]
b3=hyObj.get_anc('solar_az')[ img_row,img_col]
b4=hyObj.get_anc('solar_zn')[ img_row,img_col]
b5=hyObj.get_anc('slope')[ img_row,img_col]
b6=hyObj.get_anc('aspect')[img_row,img_col]
obs_data = np.vstack((img_row,img_col,b1,b2,b3,b4,b5,b6))
obs_df = pd.DataFrame(obs_data.T, columns=['img_row','img_col','sensor_az','sensor_zn','solar_az','solar_zn','slope','azimuth'])
return obs_df
def rasterize_polygon(hyObj,polygon_fn, key_id,use_glt_bool):
''' Rasterize polygon based on image georeference and boundary
'''
source_ds = ogr.Open(polygon_fn)
source_layer = source_ds.GetLayer()
field_list = []
ldefn = source_layer.GetLayerDefn()
for n in range(ldefn.GetFieldCount()):
fdefn = ldefn.GetFieldDefn(n)
field_list.append(fdefn.name)
print(field_list[1])
if not (key_id in field_list):
print('Field "',key_id,'" is not in the shapefile!')
return (None, None)
tmp_mem_driver=ogr.GetDriverByName('MEMORY')
dest = tmp_mem_driver.CreateDataSource('tempData')
mem_lyr = dest.CopyLayer(source_layer,'newlayer',['OVERWRITE=YES'])
FeatureCount= mem_lyr.GetFeatureCount()
# Add a new field
new_field = ogr.FieldDefn('tempFID', ogr.OFTInteger)
mem_lyr.CreateField(new_field)
lookup_dict={}
for i, feature in enumerate(mem_lyr):
feature.SetField('tempFID', i+1) # key step1
lookup_dict[str(i+1)]=feature.GetField(key_id)
mem_lyr.SetFeature(feature) # key step 2
if use_glt_bool:
out_col = hyObj.columns_glt
out_row = hyObj.lines_glt
out_transform = hyObj.glt_transform
out_proj = hyObj.glt_projection
else:
out_col = hyObj.columns
out_row = hyObj.lines
out_transform = hyObj.transform
out_proj = hyObj.projection
if FeatureCount<255:
target_ds = gdal.GetDriverByName('MEM').Create('', out_col, out_row, 1, gdal.GDT_Byte)
nodata_val=255
else:
if (FeatureCount>255 and FeatureCount < 32767):
target_ds = gdal.GetDriverByName('MEM').Create('', out_col, out_row, 1, gdal.GDT_Int16)
nodata_val=-9999
else: # >32767
target_ds = gdal.GetDriverByName('MEM').Create('', out_col, out_row, 1, gdal.GDT_Int32)
nodata_val=-9999
target_ds.SetGeoTransform(out_transform)
target_ds.SetProjection(out_proj)
band = target_ds.GetRasterBand(1)
band.SetNoDataValue(nodata_val)
gdal.RasterizeLayer(target_ds, [1], mem_lyr, options=["ATTRIBUTE=tempFID" ,"ALL_TOUCHED=FALSE"])
return (target_ds,lookup_dict)
def gen_df_polygon(hyObj, target_ds, lookup_dict, imgsrs2latlon, uid, use_glt_bool):
'''Generate a dataframe that stores the location, UID of all the points within the polygons
Parameters
----------
hyObj: HyTools file object
target_ds: GDAL raster dataset
one band raster in which each polygon has unique digital number
lookup_dict: dictionary
a distionary linking polygon DN in raster (target_ds) and the UID in the polygons attribute table
imgsrs2latlon: coordinate transformation object
transform from georeferenced coordinates of the image to LAT LON
uid: str
the user specified unique polygon ID name from the attribute table of the shapefile
Returns
-------
return_df: pandas dataframe
a dataframe that stores the location, UID of all the points within the polygons
'''
poly_raster=target_ds.GetRasterBand(1).ReadAsArray()
data_type=target_ds.GetRasterBand(1).DataType
if data_type == gdal.GDT_Byte:
ind=np.where((poly_raster>0) & (poly_raster<255) )
else:
if data_type == gdal.GDT_Int16:
ind=np.where((poly_raster>0) & (poly_raster<32767) )
else:
ind=np.where(poly_raster>0 )
total_point=len(ind[1])
print(total_point,' points')
if total_point==0:
# polygons are not intersecting the image
print( "No intersection.")
return None
return_df = pd.DataFrame(columns=['new_uid',uid,'img_col_glt','img_row_glt','img_col_raw','img_row_raw','lon','lat'])
return_df = return_df.fillna(0) # with 0s rather than NaNs
if use_glt_bool:
ul_x, new_x_resolution, new_x_rot, ul_y, new_y_rot, new_y_resolution = hyObj.glt_transform
else:
ul_x, new_x_resolution, new_x_rot, ul_y, new_y_rot, new_y_resolution = hyObj.transform
sub_id_dict = {}
for key in lookup_dict.keys():
sub_id_dict[key]=0
# add polygon ID, and point order number within the same polygon
for index in range(total_point):
row=ind[0][index]
col=ind[1][index]
if use_glt_bool:
row_post_glt = hyObj.glt_y[row,col] - 1
col_post_glt = hyObj.glt_x[row,col] - 1 # zero-based
else:
row_post_glt = row
col_post_glt = col
poly_id=poly_raster[row,col]
poly_id_code=lookup_dict[str(poly_id)]
x_coord = ul_x + (col+0.5)*new_x_resolution + (row+0.5)*new_x_rot
y_coord = ul_y + (col+0.5)*new_y_rot + (row+0.5)*new_y_resolution
lat, lon, _ = imgsrs2latlon.TransformPoint(x_coord, y_coord) #lon, lat, _ = imgsrs2latlon.TransformPoint(x_coord, y_coord)
sub_id = sub_id_dict[str(poly_id)]
sub_id_dict[str(poly_id)]+=1
temp_df = pd.DataFrame([['{}_{}'.format(poly_id_code, sub_id),poly_id_code, col,row,col_post_glt,row_post_glt, lon,lat]], columns=['new_uid', uid, 'img_col_glt','img_row_glt','img_col_raw','img_row_raw','lon','lat'])
return_df = pd.concat([return_df,temp_df],ignore_index=True) #return_df.append(temp_df,ignore_index=True)
return return_df
def local_polygon2spec(hyObj, poly_shp, uid, use_band_list=False, band_list=[],use_glt_bool=False):
"""Extract spectra with points within the boundary of polygons from the hyperspectral image
Steps:
1, Rasterize polygon based on image georeference
2, Get locations of the points of interest from the raster
3, Overlapping points and the hyperspectral image, and extract spectra
Parameters
----------
hyObj : HyTools file object
poly_shp: str
full filename of the polygon shapefile
uid: str
the user specified unique polygon ID name from the attribute table of the shapefile
use_band_list: boolean
default True; whether to use a subset of bands
band_list: list or numpy array
default is a blank list
if it is a list, it should be one like [5,6,7,8,9, 12]
if it is a numpy array, it should be the same size as hyObj.bad_bands with only True or False in the array
use_glt_bool: boolean
default False;
Returns
-------
point_df: pandas dataframe
it include all the location and spectra information for all points within the polygons
"""
if use_glt_bool:
img_srs = osr.SpatialReference(wkt=hyObj.glt_projection)
else:
img_srs = osr.SpatialReference(wkt=hyObj.projection)
latlon_wgs84 = osr.SpatialReference()
latlon_wgs84.ImportFromEPSG ( 4326 )
# LAT LON will be the only georeferenced coordinates kept in the result
imgsrs2latlon = osr.CoordinateTransformation (img_srs, latlon_wgs84)
# convert polygon geometry into raster with the same size of the image, and store UID in a lookup dictionary
target_ds, lookup_dict=rasterize_polygon(hyObj,poly_shp,uid,use_glt_bool)
if target_ds is None:
return None
# generate a dataframe that stores the location, UID of all the points within the polygons
point_df = gen_df_polygon(hyObj, target_ds, lookup_dict, imgsrs2latlon, uid, use_glt_bool)
if point_df is None :
return None
# extract full spectra information from image based on points locations
#spec_data = extract_from_point(hyObj, point_df)
spec_data = hyObj.get_pixels(point_df['img_row_raw'].values.astype(np.int16),point_df['img_col_raw'].values.astype(np.int16))
# determine the column names of the spectra dataframe based on wavelengths
if hyObj.wavelength_units.lower()[:4]=='micr':
new_band_name = ['B{:0.3f}'.format(x) for x in hyObj.wavelengths]
elif hyObj.wavelength_units.lower()[:4]=='nano' :
new_band_name = ['B{:04d}'.format(int(x)) for x in hyObj.wavelengths]
else:
new_band_name = ['B{:d}'.format(x+1) for x in range(hyObj.bands)]
if hyObj.file_type in ['ncav']:
spec_df = pd.DataFrame(spec_data, columns=new_band_name) #spec_df = pd.DataFrame(spec_data.T, columns=new_band_name)
else:
spec_df = pd.DataFrame(spec_data, columns=new_band_name)
# perform the subsetting of the columns in the dataframe according to the band_list or hyObj.bad_bands
spec_df = subset_band_list(hyObj,spec_df,use_band_list, band_list)
# merge location information and spectra information
point_df = pd.concat([point_df,spec_df], axis=1, join='inner')
return point_df
def main():
parser = argparse.ArgumentParser(description='Export fractional cover image by EndMember csv')
parser.add_argument('-i', type=str, required=True,help='Input image file name')
parser.add_argument('-pnt', type=str, required=True,help='CSV filename or shapefile')
parser.add_argument('-od', type=str, required=True,help='Output folder')
parser.add_argument('-uid', type=str, required=True,help='Unique ID in the vector file')
parser.add_argument('-epsg', type=str, required=False, help='UTM EPSG code')
parser.add_argument('-anc', type=str, required=False, help='Ancillary file / OBS file')
parser.add_argument('-glt', type=str, default=None, required=False, help='External GLT ENVI file')
parser.add_argument('-dt', type=str, default='envi', required=False, help="Data type of the image (default 'envi') ['envi','emit','ncav']", choices=['envi','emit','ncav'])
parser.add_argument('-nnb', type=int, required=False, default=4,help='How many neighbors in the image should be sampled from the center', choices=[0,4,8])
args = parser.parse_args()
in_image_file = args.i
out_path = args.od
pnt_file = args.pnt
uid = args.uid
epsg_code = args.epsg
n_neighbor_chose = args.nnb
file_format = args.dt
if not args.glt is None:
glt_dict = {
"glt_x": [args.glt,1],
"glt_y": [args.glt,0]
}
else:
glt_dict = {}
if args.anc: #not args.anc is None
anc_dict = {
"path_length": [
args.anc,
0
],
"sensor_az": [
args.anc,
1
],
"sensor_zn": [
args.anc,
2
],
"solar_az": [
args.anc,
3
],
"solar_zn": [
args.anc,
4
],
"phase": [
args.anc,
5
],
"slope": [
args.anc,
6
],
"aspect": [
args.anc,
7
],
"cosine_i": [
args.anc,
8
],
"utc_time": [
args.anc,
9
]
}
else:
anc_dict = None
if pnt_file.endswith('csv'):
if args.epsg is None:
epsg_code = None
else:
epsg_code = args.epsg
hy_obj = ht.HyTools()
hy_obj.read_file(in_image_file,file_format, glt_path=glt_dict, anc_path =anc_dict)
lookup_glt_bool = False
if file_format=='emit':
lookup_glt_bool=True
else:
if glt_dict:
lookup_glt_bool=True
if has_gdal and (pnt_file.endswith('.shp') or pnt_file.endswith('.geojson') or pnt_file.endswith('.json')):
out_df = local_polygon2spec(hy_obj, pnt_file, uid, use_band_list=False, band_list=[],use_glt_bool=lookup_glt_bool)
else:
if not pnt_file.endswith('.csv'):
print("Point location file is not in CSV format")
return
pnt_df = pd.read_csv(pnt_file)
if 'x_coord' in pnt_df.columns and 'y_coord' in pnt_df.columns:
out_df = local_point2spec(hy_obj, pnt_file, uid, 'x_coord', 'y_coord', epsg_code, n_neighbor=n_neighbor_chose, use_band_list=False, band_list=[],use_glt_bool=lookup_glt_bool)
elif 'lat' in pnt_df.columns and 'lon' in pnt_df.columns:
print('latlon inside point file.')
out_df = local_point2spec(hy_obj, pnt_file, uid, 'lon', 'lat', epsg_code, n_neighbor=n_neighbor_chose, use_band_list=False, band_list=[],use_glt_bool=lookup_glt_bool)
else:
print("Unknown coordinates column names")
return
if out_df is None:
return
img_base_name=os.path.basename(in_image_file).split('.')[0]
out_df.insert(loc = 1,
column = 'flightline',
value = [img_base_name.split('_')[0]]*out_df.shape[0])
out_df.to_csv(out_path+img_base_name+"_spec_df_asvc.csv",index=False)
if not args.anc is None:
img_row = out_df['img_row_raw'].values.astype(np.int16)
img_col = out_df['img_col_raw'].values.astype(np.int16)
out_obs_df = obs_point2spec(hy_obj, img_row, img_col)
out_obs_df.insert(loc = 0,
column = 'flightline',
value = [img_base_name.split('_')[0]]*out_df.shape[0])
out_obs_df.insert(loc = 0,
column = 'new_uid',
value = out_df['new_uid'])
out_obs_df.to_csv(out_path+img_base_name+"_obs_df.csv",index=False)
if __name__== "__main__":
main()