BRDF/Flexbrdf/scripts/imagepoints_ext_glt_ht.py


import os, sys
import numpy as np
import argparse
import pandas as pd

try:
    from osgeo import gdal, osr, ogr
    has_gdal=True
except ModuleNotFoundError:
    print("No gdal installed")
    has_gdal=False

import hytools as ht
from hytools.misc.point import local_point2spec, subset_band_list #*
#warnings.filterwarnings("ignore")
#np.seterr(divide='ignore', invalid='ignore')


def obs_point2spec(hyObj, img_row, img_col):

    b1=hyObj.get_anc('sensor_az')[ img_row,img_col]
    b2=hyObj.get_anc('sensor_zn')[ img_row,img_col]
    b3=hyObj.get_anc('solar_az')[ img_row,img_col]
    b4=hyObj.get_anc('solar_zn')[ img_row,img_col]
    b5=hyObj.get_anc('slope')[ img_row,img_col]
    b6=hyObj.get_anc('aspect')[img_row,img_col]

    obs_data = np.vstack((img_row,img_col,b1,b2,b3,b4,b5,b6))

    obs_df = pd.DataFrame(obs_data.T, columns=['img_row','img_col','sensor_az','sensor_zn','solar_az','solar_zn','slope','azimuth'])
    return obs_df

def rasterize_polygon(hyObj,polygon_fn, key_id,use_glt_bool):
    ''' Rasterize polygon based on image georeference and boundary

    '''
    source_ds = ogr.Open(polygon_fn)

    source_layer = source_ds.GetLayer()

    field_list = []
    ldefn = source_layer.GetLayerDefn()
    for n in range(ldefn.GetFieldCount()):
        fdefn = ldefn.GetFieldDefn(n)
        field_list.append(fdefn.name)
    print(field_list[1])

    if not (key_id in field_list):
        print('Field "',key_id,'" is not in the shapefile!')
        return (None, None)

    tmp_mem_driver=ogr.GetDriverByName('MEMORY')

    dest = tmp_mem_driver.CreateDataSource('tempData')

    mem_lyr = dest.CopyLayer(source_layer,'newlayer',['OVERWRITE=YES'])
    FeatureCount= mem_lyr.GetFeatureCount()
    # Add a new field
    new_field = ogr.FieldDefn('tempFID', ogr.OFTInteger)
    mem_lyr.CreateField(new_field)

    lookup_dict={}
    for i, feature in enumerate(mem_lyr):

        feature.SetField('tempFID', i+1)  # key step1
        lookup_dict[str(i+1)]=feature.GetField(key_id)
        mem_lyr.SetFeature(feature)  # key step 2

    if use_glt_bool:
        out_col = hyObj.columns_glt
        out_row = hyObj.lines_glt
        out_transform = hyObj.glt_transform
        out_proj = hyObj.glt_projection
    else:
        out_col = hyObj.columns
        out_row = hyObj.lines
        out_transform = hyObj.transform
        out_proj = hyObj.projection


    if FeatureCount<255:
        target_ds = gdal.GetDriverByName('MEM').Create('', out_col, out_row, 1, gdal.GDT_Byte)

        nodata_val=255
    else:
        if (FeatureCount>255 and FeatureCount < 32767):
            target_ds = gdal.GetDriverByName('MEM').Create('', out_col, out_row, 1, gdal.GDT_Int16)
            nodata_val=-9999
        else:    # >32767
            target_ds = gdal.GetDriverByName('MEM').Create('', out_col, out_row, 1, gdal.GDT_Int32)
            nodata_val=-9999

    target_ds.SetGeoTransform(out_transform)

    target_ds.SetProjection(out_proj)
    band = target_ds.GetRasterBand(1)
    band.SetNoDataValue(nodata_val)

    gdal.RasterizeLayer(target_ds, [1], mem_lyr, options=["ATTRIBUTE=tempFID" ,"ALL_TOUCHED=FALSE"])

    return (target_ds,lookup_dict)

def gen_df_polygon(hyObj, target_ds, lookup_dict, imgsrs2latlon, uid, use_glt_bool):
    '''Generate a dataframe that stores the location, UID of all the points within the polygons

    Parameters
    ----------

    hyObj:            HyTools file object
    target_ds:      GDAL raster dataset
                            one band raster in which each polygon has unique digital number
    lookup_dict:    dictionary
                            a distionary linking polygon DN in raster (target_ds) and the UID in the polygons attribute table
    imgsrs2latlon: coordinate transformation object
                            transform from georeferenced coordinates of the image to LAT LON
    uid:                 str
                            the user specified unique polygon ID name from the attribute table of the shapefile

    Returns
    -------
    return_df:   pandas dataframe
                        a dataframe that stores the location, UID of all the points within the polygons

    '''

    poly_raster=target_ds.GetRasterBand(1).ReadAsArray()

    data_type=target_ds.GetRasterBand(1).DataType

    if data_type == gdal.GDT_Byte:
        ind=np.where((poly_raster>0) & (poly_raster<255)  )
    else:
        if data_type == gdal.GDT_Int16:
            ind=np.where((poly_raster>0) & (poly_raster<32767)  )
        else:
            ind=np.where(poly_raster>0  )

    total_point=len(ind[1])

    print(total_point,' points')

    if total_point==0:
        # polygons are not intersecting the image
        print( "No intersection.")
        return None

    return_df = pd.DataFrame(columns=['new_uid',uid,'img_col_glt','img_row_glt','img_col_raw','img_row_raw','lon','lat'])
    return_df = return_df.fillna(0) # with 0s rather than NaNs

    if use_glt_bool:
        ul_x, new_x_resolution, new_x_rot, ul_y, new_y_rot, new_y_resolution = hyObj.glt_transform
    else:
        ul_x, new_x_resolution, new_x_rot, ul_y, new_y_rot, new_y_resolution = hyObj.transform

    sub_id_dict = {}
    for key in lookup_dict.keys():
        sub_id_dict[key]=0

    # add polygon ID, and point order number within the same polygon
    for index in range(total_point):

        row=ind[0][index]
        col=ind[1][index]

        if use_glt_bool:
            row_post_glt = hyObj.glt_y[row,col] - 1
            col_post_glt = hyObj.glt_x[row,col] - 1 # zero-based
        else:
            row_post_glt = row
            col_post_glt = col

        poly_id=poly_raster[row,col]
        poly_id_code=lookup_dict[str(poly_id)]

        x_coord = ul_x + (col+0.5)*new_x_resolution + (row+0.5)*new_x_rot
        y_coord = ul_y + (col+0.5)*new_y_rot +            (row+0.5)*new_y_resolution

        lat, lon, _ = imgsrs2latlon.TransformPoint(x_coord, y_coord) #lon, lat, _ = imgsrs2latlon.TransformPoint(x_coord, y_coord)

        sub_id = sub_id_dict[str(poly_id)]
        sub_id_dict[str(poly_id)]+=1

        temp_df = pd.DataFrame([['{}_{}'.format(poly_id_code, sub_id),poly_id_code, col,row,col_post_glt,row_post_glt, lon,lat]], columns=['new_uid', uid, 'img_col_glt','img_row_glt','img_col_raw','img_row_raw','lon','lat'])

        return_df = pd.concat([return_df,temp_df],ignore_index=True)  #return_df.append(temp_df,ignore_index=True)


    return return_df

def local_polygon2spec(hyObj, poly_shp, uid, use_band_list=False, band_list=[],use_glt_bool=False):
    """Extract spectra with points within the boundary of polygons from the hyperspectral image

    Steps:
    1, Rasterize polygon based on image georeference
    2, Get locations of the points of interest from the raster
    3, Overlapping points and the hyperspectral image, and extract spectra

    Parameters
    ----------
    hyObj :              HyTools file object
    poly_shp:          str
                            full filename of the polygon shapefile
    uid:                   str
                            the user specified unique polygon ID name from the attribute table of the shapefile
    use_band_list: boolean
                            default True; whether to use a subset of bands
    band_list:         list or numpy array
                            default is a blank list
                            if it is a list, it should be one like [5,6,7,8,9, 12]
                            if it is a numpy array, it should be the same size as hyObj.bad_bands with only True or False in the array
    use_glt_bool: boolean
                        default False;

    Returns
    -------
    point_df: pandas dataframe
                    it include all the location and spectra information for all points within the polygons

    """

    if use_glt_bool:
        img_srs = osr.SpatialReference(wkt=hyObj.glt_projection)
    else:
        img_srs = osr.SpatialReference(wkt=hyObj.projection)

    latlon_wgs84 = osr.SpatialReference()
    latlon_wgs84.ImportFromEPSG ( 4326 )

    # LAT LON will be the only georeferenced coordinates kept in the result
    imgsrs2latlon = osr.CoordinateTransformation (img_srs, latlon_wgs84)

    # convert polygon geometry into raster with the same size of the image, and store UID in a lookup dictionary
    target_ds, lookup_dict=rasterize_polygon(hyObj,poly_shp,uid,use_glt_bool)

    if target_ds is None:
        return None

    # generate a dataframe that stores the location, UID of all the points within the polygons
    point_df = gen_df_polygon(hyObj, target_ds, lookup_dict, imgsrs2latlon, uid, use_glt_bool)

    if point_df is None :
        return None

    # extract full spectra information from image based on points locations
    #spec_data = extract_from_point(hyObj, point_df)

    spec_data = hyObj.get_pixels(point_df['img_row_raw'].values.astype(np.int16),point_df['img_col_raw'].values.astype(np.int16))

    # determine the column names of the spectra dataframe based on wavelengths
    if hyObj.wavelength_units.lower()[:4]=='micr':
        new_band_name = ['B{:0.3f}'.format(x) for x in hyObj.wavelengths]
    elif hyObj.wavelength_units.lower()[:4]=='nano' :
        new_band_name = ['B{:04d}'.format(int(x)) for x in hyObj.wavelengths]
    else:
        new_band_name = ['B{:d}'.format(x+1) for x in range(hyObj.bands)]

    if hyObj.file_type in ['ncav']:
        spec_df = pd.DataFrame(spec_data, columns=new_band_name) #spec_df = pd.DataFrame(spec_data.T, columns=new_band_name)
    else:
        spec_df = pd.DataFrame(spec_data, columns=new_band_name)

    # perform the subsetting of the columns in the dataframe according to the band_list or hyObj.bad_bands
    spec_df = subset_band_list(hyObj,spec_df,use_band_list, band_list)

    # merge location information and spectra information
    point_df = pd.concat([point_df,spec_df], axis=1, join='inner')

    return point_df

def main():
    parser = argparse.ArgumentParser(description='Export fractional cover image by EndMember csv')

    parser.add_argument('-i', type=str, required=True,help='Input image file name')
    parser.add_argument('-pnt', type=str, required=True,help='CSV filename or shapefile')
    parser.add_argument('-od', type=str, required=True,help='Output folder')
    parser.add_argument('-uid', type=str, required=True,help='Unique ID in the vector file')
    parser.add_argument('-epsg', type=str, required=False, help='UTM EPSG code')

    parser.add_argument('-anc', type=str, required=False, help='Ancillary file / OBS file')
    parser.add_argument('-glt', type=str, default=None, required=False, help='External GLT ENVI file')

    parser.add_argument('-dt', type=str, default='envi', required=False, help="Data type of the image (default 'envi') ['envi','emit','ncav']", choices=['envi','emit','ncav'])

    parser.add_argument('-nnb', type=int, required=False, default=4,help='How many neighbors in the image should be sampled from the center', choices=[0,4,8])


    args = parser.parse_args()

    in_image_file = args.i
    out_path = args.od
    pnt_file = args.pnt
    uid = args.uid
    epsg_code = args.epsg
    n_neighbor_chose = args.nnb

    file_format = args.dt

    if not args.glt is None:
        glt_dict = {
            "glt_x": [args.glt,1],
            "glt_y": [args.glt,0]
        }
    else:
        glt_dict = {}

    if args.anc: #not args.anc is None
        anc_dict = {
         "path_length": [
            args.anc,
            0
         ],
         "sensor_az": [
            args.anc,
            1
         ],
         "sensor_zn": [
            args.anc,
            2
         ],
         "solar_az": [
            args.anc,
            3
         ],
         "solar_zn": [
            args.anc,
            4
         ],
         "phase": [
            args.anc,
            5
         ],
         "slope": [
            args.anc,
            6
         ],
         "aspect": [
            args.anc,
            7
         ],
         "cosine_i": [
            args.anc,
            8
         ],
         "utc_time": [
            args.anc,
            9
         ]
        }
    else:
        anc_dict = None

    if pnt_file.endswith('csv'):
        if args.epsg is None:
            epsg_code = None
        else:
            epsg_code = args.epsg


    hy_obj = ht.HyTools()
    hy_obj.read_file(in_image_file,file_format, glt_path=glt_dict, anc_path =anc_dict)

    lookup_glt_bool = False
    if file_format=='emit':
        lookup_glt_bool=True
    else:
        if glt_dict:
            lookup_glt_bool=True

    if has_gdal and (pnt_file.endswith('.shp') or pnt_file.endswith('.geojson') or pnt_file.endswith('.json')):
        out_df = local_polygon2spec(hy_obj, pnt_file, uid, use_band_list=False, band_list=[],use_glt_bool=lookup_glt_bool)
    else:
        if not pnt_file.endswith('.csv'):
            print("Point location file is not in CSV format")
            return

        pnt_df = pd.read_csv(pnt_file)
        if 'x_coord' in pnt_df.columns and 'y_coord' in pnt_df.columns:
            out_df = local_point2spec(hy_obj, pnt_file, uid, 'x_coord', 'y_coord', epsg_code, n_neighbor=n_neighbor_chose, use_band_list=False, band_list=[],use_glt_bool=lookup_glt_bool)
        elif 'lat' in pnt_df.columns and 'lon' in pnt_df.columns:
            print('latlon inside point file.')
            out_df = local_point2spec(hy_obj, pnt_file, uid, 'lon', 'lat', epsg_code, n_neighbor=n_neighbor_chose, use_band_list=False, band_list=[],use_glt_bool=lookup_glt_bool)
        else:
            print("Unknown coordinates column names")
            return

    if out_df is None:
        return

    img_base_name=os.path.basename(in_image_file).split('.')[0]

    out_df.insert(loc = 1,
          column = 'flightline',
          value = [img_base_name.split('_')[0]]*out_df.shape[0])

    out_df.to_csv(out_path+img_base_name+"_spec_df_asvc.csv",index=False)

    if not args.anc is None:
        img_row = out_df['img_row_raw'].values.astype(np.int16)
        img_col = out_df['img_col_raw'].values.astype(np.int16)
        out_obs_df = obs_point2spec(hy_obj, img_row, img_col)
        out_obs_df.insert(loc = 0,
          column = 'flightline',
          value = [img_base_name.split('_')[0]]*out_df.shape[0])
        out_obs_df.insert(loc = 0,
          column = 'new_uid',
          value = out_df['new_uid'])
        out_obs_df.to_csv(out_path+img_base_name+"_obs_df.csv",index=False)

if __name__== "__main__":
    main()