V4.0.13.1 fim4 alpha by catchments (#743)

NOAA-OWP · Dec 9, 2022 · 8a043d6 · 8a043d6
1 parent b8a209f
commit 8a043d6
Show file tree

Hide file tree

Showing 5 changed files with 735 additions and 0 deletions.
diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
@@ -1,6 +1,19 @@
 All notable changes to this project will be documented in this file.
 We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.
 
+## v4.0.13.1 - 2022-12-09 - [PR #743](https://github.com/NOAA-OWP/inundation-mapping/pull/743)
+
+This merge adds the tools required to generate Alpha metrics by hydroid. It summarizes the Apha metrics by branch 0 catchment for use in the Hydrovis "FIM Performance" service.
+
+## Additions
+
+- `pixel_counter.py`:  A script to perform zonal statistics against raster data and geometries
+- `pixel_counter_functions.py`: Supporting functions
+- `pixel_counter_wrapper.py`: a script that wraps `pixel_counter.py` for batch processing
+- `test_case_by_hydroid.py`: the main script to orchestrate the generation of alpha metrics by catchment
+
+<br/><br/>
+
 ## v4.0.13.0 - 2022-11-16 - [PR #744](https://github.com/NOAA-OWP/inundation-mapping/pull/744)
 
 Changes branch 0 headwaters data source from NHD to NWS to be consistent with branches. Removes references to NHD flowlines and headwater data.

diff --git a/tools/pixel_counter.py b/tools/pixel_counter.py
@@ -0,0 +1,291 @@
+'''Created on 02/21/2022.
+Written by:
+Anuska Narayanan (The University of Alabama Department of Geography, [email protected];
+Sophie Williams (The University of Alabama Department of Geography, [email protected]; and
+Brad Bates (NOAA, Lynker, and the National Water Center, [email protected])
+
+Derived from a Python version of a zonal statistics function written by Matthew Perry (@perrygeo).
+
+Description: This script isolates the number of pixels per class of a raster within the outlines of
+one or more polygons and displays them in a table. It accomplishes this by rasterizing the vector file,
+masking out the desired areas of both rasters, and then summarizing them in a dataframe. It makes use
+of the gdal, numpy, and pandas function libraries.
+Inputs: one raster file with at least one set of attributes; one vector file containing one or more polygon
+boundaries
+Output: a dataframe table with rows displayed by each polygon within the vector file, and columns
+displaying the pixel count of each raster attribute class in the polygon
+'''
+
+# Import raster and vector function libraries
+#from types import NoneType
+from osgeo import gdal, ogr
+from osgeo.gdalconst import *
+# Import numerical data library
+import numpy as np
+# Import file management library
+import sys
+import os
+# Import data analysis library
+import pandas as pd
+import argparse
+from pandas import DataFrame
+import copy
+import pathlib
+import tempfile
+
+from pixel_counter_functions import (get_nlcd_counts, get_levee_counts, get_bridge_counts, get_nlcd_counts_inside_flood,get_mask_value_counts)
+
+
+# Set up error handler
+gdal.PushErrorHandler('CPLQuietErrorHandler')
+
+# Function to pologonize flood extent
+def make_flood_extent_polygon(flood_extent):
+    flood_extent_dataset = gdal.Open(flood_extent)
+    cols = flood_extent_dataset.RasterXSize
+    rows = flood_extent_dataset.RasterYSize
+
+    # Get some metadata to filter out NaN values
+    flood_extent_raster = flood_extent_dataset.GetRasterBand(1)
+    noDataVal = flood_extent_raster.GetNoDataValue()  # no data value
+    scaleFactor = flood_extent_raster.GetScale()  # scale factor
+
+    # Assign flood_extent No Data Values to NaN
+    flood_extent_array = flood_extent_dataset.GetRasterBand(1).ReadAsArray(0, 0, cols, rows).astype(np.float)
+    flood_extent_array[flood_extent_array == int(noDataVal)] = np.nan
+    flood_extent_array = flood_extent_array / scaleFactor
+
+    # Assign flood_extent Negative Values to NaN
+    flood_extent_nonzero_array = copy.copy(flood_extent_array)
+    flood_extent_nonzero_array[flood_extent_array < 0] = np.nan
+
+    # make temporary output file
+    mem_drv = gdal.GetDriverByName('MEM')
+    target = mem_drv.Create('temp_tif', cols, rows, 1, gdal.GDT_Float32)
+    target.GetRasterBand(1).WriteArray(flood_extent_nonzero_array)
+
+    # Add GeoTranform and Projection
+    geotrans = flood_extent_dataset.GetGeoTransform()
+    proj = flood_extent_dataset.GetProjection()
+    target.SetGeoTransform(geotrans)
+    target.SetProjection(proj)
+    target.FlushCache()
+
+    # set up inputs for converting flood extent raster to polygon
+    band = target.GetRasterBand(1)
+    band.ReadAsArray()
+
+    outshape_location = tempfile.gettempdir()
+    outshape_location_path = os.path.abspath(outshape_location)
+    outShapefile = outshape_location_path + "/" + "polygonized.shp"
+    driver = ogr.GetDriverByName("ESRI Shapefile")
+    outDatasource = driver.CreateDataSource(outShapefile)
+    outLayer = outDatasource.CreateLayer("buffalo", srs=None)
+
+    # Add the DN field
+    newField = ogr.FieldDefn('HydroID', ogr.OFTInteger)
+    outLayer.CreateField(newField)
+
+    # Polygonize
+    gdal.Polygonize(band, None, outLayer, 0, [], callback=None)
+    outDatasource.Destroy()
+    sourceRaster = None
+
+    fullpath = os.path.abspath(outShapefile)
+    print(fullpath)
+    print(type(fullpath))
+
+    return fullpath
+# Function that transforms vector dataset to raster
+def bbox_to_pixel_offsets(gt, bbox):
+    originX = gt[0]
+    originY = gt[3]
+    pixel_width = gt[1]
+    pixel_height = gt[5]
+    x1 = int((bbox[0] - originX) / pixel_width)
+    x2 = int((bbox[1] - originX) / pixel_width) + 1
+
+    y1 = int((bbox[3] - originY) / pixel_height)
+    y2 = int((bbox[2] - originY) / pixel_height) + 1
+
+    xsize = x2 - x1
+    ysize = y2 - y1
+    return (x1, y1, xsize, ysize)
+
+
+# Main function that determines zonal statistics of raster classes in a polygon area
+def zonal_stats(vector_path, raster_path_dict, nodata_value=None, global_src_extent=False):
+
+    # Loop through different raster paths in the raster_path_dict and
+    # perform zonal statistics on the files.
+    for layer in raster_path_dict:
+        raster_path = raster_path_dict[layer]
+        if raster_path == "":  # Only process if a raster path is provided
+            continue
+        if layer == 'flood_extent' and raster_path_dict["nlcd"]!= "":
+            vector_path = make_flood_extent_polygon(flood_extent)
+            raster_path = raster_path_dict["nlcd"]
+
+
+        # Opens raster file and sets path
+        rds = gdal.Open(raster_path, GA_ReadOnly)
+
+        assert rds
+        rb = rds.GetRasterBand(1)
+        rgt = rds.GetGeoTransform()
+
+        if nodata_value:
+            nodata_value = float(nodata_value)
+            rb.SetNoDataValue(nodata_value)
+        if vector_path == "":
+            print('No vector path provided. Continuing to next layer.')
+            continue
+        # Opens vector file and sets path
+
+        try:
+            vds = ogr.Open(vector_path)
+            vlyr = vds.GetLayer(0)
+        except:
+            continue
+
+        # Creates an in-memory numpy array of the source raster data covering the whole extent of the vector layer
+        if global_src_extent:
+            # use global source extent
+            # useful only when disk IO or raster scanning inefficiencies are your limiting factor
+            # advantage: reads raster data in one pass
+            # disadvantage: large vector extents may have big memory requirements
+            src_offset = bbox_to_pixel_offsets(rgt, vlyr.GetExtent())
+            src_array = rb.ReadAsArray(*src_offset)
+
+            # calculate new geotransform of the layer subset
+            new_gt = (
+                (rgt[0] + (src_offset[0] * rgt[1])),
+                rgt[1],
+                0.0,
+                (rgt[3] + (src_offset[1] * rgt[5])),
+                0.0,
+                rgt[5]
+            )
+
+        mem_drv = ogr.GetDriverByName('Memory')
+        driver = gdal.GetDriverByName('MEM')
+
+        # Loop through vectors, as many as exist in file
+        # Creates new list to contain their stats
+        stats = []
+        feat = vlyr.GetNextFeature()
+        while feat is not None:
+
+            if not global_src_extent:
+                # use local source extent
+                # fastest option when you have fast disks and well indexed raster (ie tiled Geotiff)
+                # advantage: each feature uses the smallest raster chunk
+                # disadvantage: lots of reads on the source raster
+                src_offset = bbox_to_pixel_offsets(rgt, feat.geometry().GetEnvelope())
+                src_array = rb.ReadAsArray(*src_offset)
+
+                # calculate new geotransform of the feature subset
+                new_gt = (
+                    (rgt[0] + (src_offset[0] * rgt[1])),
+                    rgt[1],
+                    0.0,
+                    (rgt[3] + (src_offset[1] * rgt[5])),
+                    0.0,
+                    rgt[5]
+                )
+
+            # Create a temporary vector layer in memory
+            mem_ds = mem_drv.CreateDataSource('out')
+            mem_layer = mem_ds.CreateLayer('poly', None, ogr.wkbPolygon)
+            mem_layer.CreateFeature(feat.Clone())
+
+            # Rasterize temporary vector layer
+            rvds = driver.Create('', src_offset[2], src_offset[3], 1, gdal.GDT_Byte)
+            rvds.SetGeoTransform(new_gt)
+            gdal.RasterizeLayer(rvds, [1], mem_layer, burn_values=[1])
+            rv_array = rvds.ReadAsArray()
+
+            # Mask the source data array with our current feature and get statistics (pixel count) of masked areas
+            # we take the logical_not to flip 0<->1 to get the correct mask effect
+            # we also mask out nodata values explictly
+            if src_array is None:
+                feat = vlyr.GetNextFeature()
+                continue
+            masked = np.ma.MaskedArray(
+                src_array,
+                mask=np.logical_or(
+                    src_array == nodata_value,
+                    np.logical_not(rv_array)
+                )
+            )
+
+            # Call different counter functions depending on the raster's source.
+            if layer == "nlcd":
+                feature_stats = get_nlcd_counts(feat, masked)
+            if layer == "agreement_raster":
+                feature_stats = get_mask_value_counts(feat, masked)
+            if layer == "levees":
+                feature_stats = get_levee_counts(feat, masked)
+            if layer == "bridges":
+                feature_stats = get_bridge_counts(feat, masked)
+            if layer == "flood_extent":
+                feature_stats = get_nlcd_counts_inside_flood(feat, masked)
+
+            stats.append(feature_stats)
+
+            rvds = None
+            mem_ds = None
+            feat = vlyr.GetNextFeature()
+
+    vds = None
+    rds = None
+    return stats
+
+
+# Creates and prints dataframe containing desired statistics
+if __name__ == "__main__":
+    # opts = {'VECTOR': sys.argv[1:], 'RASTER': sys.argv[2:]}
+    # stats = zonal_stats(opts['VECTOR'], opts['RASTER'])
+
+    parser = argparse.ArgumentParser(description='Computes pixel counts for raster classes within a vector area.')
+    parser.add_argument('-v', '--vector',
+                        help='Path to vector file.',
+                        required=False,
+                        default="")
+    parser.add_argument('-n', '--nlcd',
+                        help='Path to National Land Cover Database raster file.',
+                        required=False,
+                        default="")
+    parser.add_argument('-l', '--levees',
+                        help='Path to levees raster file.',
+                        required=False,
+                        default="")
+    parser.add_argument('-b', '--bridges',
+                        help='Path to bridges file.',
+                        required=False,
+                        default="")
+    parser.add_argument('-f', '--flood_extent',
+                        help='Path to flood extent file.',
+                        required=False,
+                        default="")
+    parser.add_argument('-c', '--csv',
+                        help='Path to export csv file.',
+                        required=True)
+    # Assign variables from arguments.
+    args = vars(parser.parse_args())
+    vector = args['vector']
+    nlcd = args['nlcd']
+    levees = args['levees']
+    bridges = args['bridges']
+    flood_extent = args['flood_extent']
+
+    csv = args['csv']
+
+    raster_path_dict = {'nlcd': nlcd, 'levees': levees, 'bridges': bridges, 'flood_extent': flood_extent}
+    stats = zonal_stats(vector, raster_path_dict)
+
+    # Export CSV
+    df = pd.DataFrame(stats)
+    result = df[(df >= 0).all(axis=1)]
+    df2 = pd.DataFrame(result)
+    df2.to_csv(csv, index=False)