import os, sys import numpy as np import argparse import pandas as pd try: from osgeo import gdal, osr, ogr has_gdal=True except ModuleNotFoundError: print("No gdal installed") has_gdal=False import hytools as ht from hytools.misc.point import local_point2spec, subset_band_list #* #warnings.filterwarnings("ignore") #np.seterr(divide='ignore', invalid='ignore') def obs_point2spec(hyObj, img_row, img_col): b1=hyObj.get_anc('sensor_az')[ img_row,img_col] b2=hyObj.get_anc('sensor_zn')[ img_row,img_col] b3=hyObj.get_anc('solar_az')[ img_row,img_col] b4=hyObj.get_anc('solar_zn')[ img_row,img_col] b5=hyObj.get_anc('slope')[ img_row,img_col] b6=hyObj.get_anc('aspect')[img_row,img_col] obs_data = np.vstack((img_row,img_col,b1,b2,b3,b4,b5,b6)) obs_df = pd.DataFrame(obs_data.T, columns=['img_row','img_col','sensor_az','sensor_zn','solar_az','solar_zn','slope','azimuth']) return obs_df def rasterize_polygon(hyObj,polygon_fn, key_id,use_glt_bool): ''' Rasterize polygon based on image georeference and boundary ''' source_ds = ogr.Open(polygon_fn) source_layer = source_ds.GetLayer() field_list = [] ldefn = source_layer.GetLayerDefn() for n in range(ldefn.GetFieldCount()): fdefn = ldefn.GetFieldDefn(n) field_list.append(fdefn.name) print(field_list[1]) if not (key_id in field_list): print('Field "',key_id,'" is not in the shapefile!') return (None, None) tmp_mem_driver=ogr.GetDriverByName('MEMORY') dest = tmp_mem_driver.CreateDataSource('tempData') mem_lyr = dest.CopyLayer(source_layer,'newlayer',['OVERWRITE=YES']) FeatureCount= mem_lyr.GetFeatureCount() # Add a new field new_field = ogr.FieldDefn('tempFID', ogr.OFTInteger) mem_lyr.CreateField(new_field) lookup_dict={} for i, feature in enumerate(mem_lyr): feature.SetField('tempFID', i+1) # key step1 lookup_dict[str(i+1)]=feature.GetField(key_id) mem_lyr.SetFeature(feature) # key step 2 if use_glt_bool: out_col = hyObj.columns_glt out_row = hyObj.lines_glt out_transform = hyObj.glt_transform out_proj = hyObj.glt_projection else: out_col = hyObj.columns out_row = hyObj.lines out_transform = hyObj.transform out_proj = hyObj.projection if FeatureCount<255: target_ds = gdal.GetDriverByName('MEM').Create('', out_col, out_row, 1, gdal.GDT_Byte) nodata_val=255 else: if (FeatureCount>255 and FeatureCount < 32767): target_ds = gdal.GetDriverByName('MEM').Create('', out_col, out_row, 1, gdal.GDT_Int16) nodata_val=-9999 else: # >32767 target_ds = gdal.GetDriverByName('MEM').Create('', out_col, out_row, 1, gdal.GDT_Int32) nodata_val=-9999 target_ds.SetGeoTransform(out_transform) target_ds.SetProjection(out_proj) band = target_ds.GetRasterBand(1) band.SetNoDataValue(nodata_val) gdal.RasterizeLayer(target_ds, [1], mem_lyr, options=["ATTRIBUTE=tempFID" ,"ALL_TOUCHED=FALSE"]) return (target_ds,lookup_dict) def gen_df_polygon(hyObj, target_ds, lookup_dict, imgsrs2latlon, uid, use_glt_bool): '''Generate a dataframe that stores the location, UID of all the points within the polygons Parameters ---------- hyObj: HyTools file object target_ds: GDAL raster dataset one band raster in which each polygon has unique digital number lookup_dict: dictionary a distionary linking polygon DN in raster (target_ds) and the UID in the polygons attribute table imgsrs2latlon: coordinate transformation object transform from georeferenced coordinates of the image to LAT LON uid: str the user specified unique polygon ID name from the attribute table of the shapefile Returns ------- return_df: pandas dataframe a dataframe that stores the location, UID of all the points within the polygons ''' poly_raster=target_ds.GetRasterBand(1).ReadAsArray() data_type=target_ds.GetRasterBand(1).DataType if data_type == gdal.GDT_Byte: ind=np.where((poly_raster>0) & (poly_raster<255) ) else: if data_type == gdal.GDT_Int16: ind=np.where((poly_raster>0) & (poly_raster<32767) ) else: ind=np.where(poly_raster>0 ) total_point=len(ind[1]) print(total_point,' points') if total_point==0: # polygons are not intersecting the image print( "No intersection.") return None return_df = pd.DataFrame(columns=['new_uid',uid,'img_col_glt','img_row_glt','img_col_raw','img_row_raw','lon','lat']) return_df = return_df.fillna(0) # with 0s rather than NaNs if use_glt_bool: ul_x, new_x_resolution, new_x_rot, ul_y, new_y_rot, new_y_resolution = hyObj.glt_transform else: ul_x, new_x_resolution, new_x_rot, ul_y, new_y_rot, new_y_resolution = hyObj.transform sub_id_dict = {} for key in lookup_dict.keys(): sub_id_dict[key]=0 # add polygon ID, and point order number within the same polygon for index in range(total_point): row=ind[0][index] col=ind[1][index] if use_glt_bool: row_post_glt = hyObj.glt_y[row,col] - 1 col_post_glt = hyObj.glt_x[row,col] - 1 # zero-based else: row_post_glt = row col_post_glt = col poly_id=poly_raster[row,col] poly_id_code=lookup_dict[str(poly_id)] x_coord = ul_x + (col+0.5)*new_x_resolution + (row+0.5)*new_x_rot y_coord = ul_y + (col+0.5)*new_y_rot + (row+0.5)*new_y_resolution lat, lon, _ = imgsrs2latlon.TransformPoint(x_coord, y_coord) #lon, lat, _ = imgsrs2latlon.TransformPoint(x_coord, y_coord) sub_id = sub_id_dict[str(poly_id)] sub_id_dict[str(poly_id)]+=1 temp_df = pd.DataFrame([['{}_{}'.format(poly_id_code, sub_id),poly_id_code, col,row,col_post_glt,row_post_glt, lon,lat]], columns=['new_uid', uid, 'img_col_glt','img_row_glt','img_col_raw','img_row_raw','lon','lat']) return_df = pd.concat([return_df,temp_df],ignore_index=True) #return_df.append(temp_df,ignore_index=True) return return_df def local_polygon2spec(hyObj, poly_shp, uid, use_band_list=False, band_list=[],use_glt_bool=False): """Extract spectra with points within the boundary of polygons from the hyperspectral image Steps: 1, Rasterize polygon based on image georeference 2, Get locations of the points of interest from the raster 3, Overlapping points and the hyperspectral image, and extract spectra Parameters ---------- hyObj : HyTools file object poly_shp: str full filename of the polygon shapefile uid: str the user specified unique polygon ID name from the attribute table of the shapefile use_band_list: boolean default True; whether to use a subset of bands band_list: list or numpy array default is a blank list if it is a list, it should be one like [5,6,7,8,9, 12] if it is a numpy array, it should be the same size as hyObj.bad_bands with only True or False in the array use_glt_bool: boolean default False; Returns ------- point_df: pandas dataframe it include all the location and spectra information for all points within the polygons """ if use_glt_bool: img_srs = osr.SpatialReference(wkt=hyObj.glt_projection) else: img_srs = osr.SpatialReference(wkt=hyObj.projection) latlon_wgs84 = osr.SpatialReference() latlon_wgs84.ImportFromEPSG ( 4326 ) # LAT LON will be the only georeferenced coordinates kept in the result imgsrs2latlon = osr.CoordinateTransformation (img_srs, latlon_wgs84) # convert polygon geometry into raster with the same size of the image, and store UID in a lookup dictionary target_ds, lookup_dict=rasterize_polygon(hyObj,poly_shp,uid,use_glt_bool) if target_ds is None: return None # generate a dataframe that stores the location, UID of all the points within the polygons point_df = gen_df_polygon(hyObj, target_ds, lookup_dict, imgsrs2latlon, uid, use_glt_bool) if point_df is None : return None # extract full spectra information from image based on points locations #spec_data = extract_from_point(hyObj, point_df) spec_data = hyObj.get_pixels(point_df['img_row_raw'].values.astype(np.int16),point_df['img_col_raw'].values.astype(np.int16)) # determine the column names of the spectra dataframe based on wavelengths if hyObj.wavelength_units.lower()[:4]=='micr': new_band_name = ['B{:0.3f}'.format(x) for x in hyObj.wavelengths] elif hyObj.wavelength_units.lower()[:4]=='nano' : new_band_name = ['B{:04d}'.format(int(x)) for x in hyObj.wavelengths] else: new_band_name = ['B{:d}'.format(x+1) for x in range(hyObj.bands)] if hyObj.file_type in ['ncav']: spec_df = pd.DataFrame(spec_data, columns=new_band_name) #spec_df = pd.DataFrame(spec_data.T, columns=new_band_name) else: spec_df = pd.DataFrame(spec_data, columns=new_band_name) # perform the subsetting of the columns in the dataframe according to the band_list or hyObj.bad_bands spec_df = subset_band_list(hyObj,spec_df,use_band_list, band_list) # merge location information and spectra information point_df = pd.concat([point_df,spec_df], axis=1, join='inner') return point_df def main(): parser = argparse.ArgumentParser(description='Export fractional cover image by EndMember csv') parser.add_argument('-i', type=str, required=True,help='Input image file name') parser.add_argument('-pnt', type=str, required=True,help='CSV filename or shapefile') parser.add_argument('-od', type=str, required=True,help='Output folder') parser.add_argument('-uid', type=str, required=True,help='Unique ID in the vector file') parser.add_argument('-epsg', type=str, required=False, help='UTM EPSG code') parser.add_argument('-anc', type=str, required=False, help='Ancillary file / OBS file') parser.add_argument('-glt', type=str, default=None, required=False, help='External GLT ENVI file') parser.add_argument('-dt', type=str, default='envi', required=False, help="Data type of the image (default 'envi') ['envi','emit','ncav']", choices=['envi','emit','ncav']) parser.add_argument('-nnb', type=int, required=False, default=4,help='How many neighbors in the image should be sampled from the center', choices=[0,4,8]) args = parser.parse_args() in_image_file = args.i out_path = args.od pnt_file = args.pnt uid = args.uid epsg_code = args.epsg n_neighbor_chose = args.nnb file_format = args.dt if not args.glt is None: glt_dict = { "glt_x": [args.glt,1], "glt_y": [args.glt,0] } else: glt_dict = {} if args.anc: #not args.anc is None anc_dict = { "path_length": [ args.anc, 0 ], "sensor_az": [ args.anc, 1 ], "sensor_zn": [ args.anc, 2 ], "solar_az": [ args.anc, 3 ], "solar_zn": [ args.anc, 4 ], "phase": [ args.anc, 5 ], "slope": [ args.anc, 6 ], "aspect": [ args.anc, 7 ], "cosine_i": [ args.anc, 8 ], "utc_time": [ args.anc, 9 ] } else: anc_dict = None if pnt_file.endswith('csv'): if args.epsg is None: epsg_code = None else: epsg_code = args.epsg hy_obj = ht.HyTools() hy_obj.read_file(in_image_file,file_format, glt_path=glt_dict, anc_path =anc_dict) lookup_glt_bool = False if file_format=='emit': lookup_glt_bool=True else: if glt_dict: lookup_glt_bool=True if has_gdal and (pnt_file.endswith('.shp') or pnt_file.endswith('.geojson') or pnt_file.endswith('.json')): out_df = local_polygon2spec(hy_obj, pnt_file, uid, use_band_list=False, band_list=[],use_glt_bool=lookup_glt_bool) else: if not pnt_file.endswith('.csv'): print("Point location file is not in CSV format") return pnt_df = pd.read_csv(pnt_file) if 'x_coord' in pnt_df.columns and 'y_coord' in pnt_df.columns: out_df = local_point2spec(hy_obj, pnt_file, uid, 'x_coord', 'y_coord', epsg_code, n_neighbor=n_neighbor_chose, use_band_list=False, band_list=[],use_glt_bool=lookup_glt_bool) elif 'lat' in pnt_df.columns and 'lon' in pnt_df.columns: print('latlon inside point file.') out_df = local_point2spec(hy_obj, pnt_file, uid, 'lon', 'lat', epsg_code, n_neighbor=n_neighbor_chose, use_band_list=False, band_list=[],use_glt_bool=lookup_glt_bool) else: print("Unknown coordinates column names") return if out_df is None: return img_base_name=os.path.basename(in_image_file).split('.')[0] out_df.insert(loc = 1, column = 'flightline', value = [img_base_name.split('_')[0]]*out_df.shape[0]) out_df.to_csv(out_path+img_base_name+"_spec_df_asvc.csv",index=False) if not args.anc is None: img_row = out_df['img_row_raw'].values.astype(np.int16) img_col = out_df['img_col_raw'].values.astype(np.int16) out_obs_df = obs_point2spec(hy_obj, img_row, img_col) out_obs_df.insert(loc = 0, column = 'flightline', value = [img_base_name.split('_')[0]]*out_df.shape[0]) out_obs_df.insert(loc = 0, column = 'new_uid', value = out_df['new_uid']) out_obs_df.to_csv(out_path+img_base_name+"_obs_df.csv",index=False) if __name__== "__main__": main()