Skip to content

Commit

Permalink
V4.0.13.1 fim4 alpha by catchments (#743)
Browse files Browse the repository at this point in the history
  • Loading branch information
CalebOliven-NOAA authored Dec 9, 2022
1 parent b8a209f commit 8a043d6
Show file tree
Hide file tree
Showing 5 changed files with 735 additions and 0 deletions.
13 changes: 13 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,19 @@
All notable changes to this project will be documented in this file.
We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.

## v4.0.13.1 - 2022-12-09 - [PR #743](https://github.com/NOAA-OWP/inundation-mapping/pull/743)

This merge adds the tools required to generate Alpha metrics by hydroid. It summarizes the Apha metrics by branch 0 catchment for use in the Hydrovis "FIM Performance" service.

## Additions

- `pixel_counter.py`: A script to perform zonal statistics against raster data and geometries
- `pixel_counter_functions.py`: Supporting functions
- `pixel_counter_wrapper.py`: a script that wraps `pixel_counter.py` for batch processing
- `test_case_by_hydroid.py`: the main script to orchestrate the generation of alpha metrics by catchment

<br/><br/>

## v4.0.13.0 - 2022-11-16 - [PR #744](https://github.com/NOAA-OWP/inundation-mapping/pull/744)

Changes branch 0 headwaters data source from NHD to NWS to be consistent with branches. Removes references to NHD flowlines and headwater data.
Expand Down
291 changes: 291 additions & 0 deletions tools/pixel_counter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,291 @@
'''Created on 02/21/2022.
Written by:
Anuska Narayanan (The University of Alabama Department of Geography, [email protected];
Sophie Williams (The University of Alabama Department of Geography, [email protected]; and
Brad Bates (NOAA, Lynker, and the National Water Center, [email protected])
Derived from a Python version of a zonal statistics function written by Matthew Perry (@perrygeo).
Description: This script isolates the number of pixels per class of a raster within the outlines of
one or more polygons and displays them in a table. It accomplishes this by rasterizing the vector file,
masking out the desired areas of both rasters, and then summarizing them in a dataframe. It makes use
of the gdal, numpy, and pandas function libraries.
Inputs: one raster file with at least one set of attributes; one vector file containing one or more polygon
boundaries
Output: a dataframe table with rows displayed by each polygon within the vector file, and columns
displaying the pixel count of each raster attribute class in the polygon
'''

# Import raster and vector function libraries
#from types import NoneType
from osgeo import gdal, ogr
from osgeo.gdalconst import *
# Import numerical data library
import numpy as np
# Import file management library
import sys
import os
# Import data analysis library
import pandas as pd
import argparse
from pandas import DataFrame
import copy
import pathlib
import tempfile

from pixel_counter_functions import (get_nlcd_counts, get_levee_counts, get_bridge_counts, get_nlcd_counts_inside_flood,get_mask_value_counts)


# Set up error handler
gdal.PushErrorHandler('CPLQuietErrorHandler')

# Function to pologonize flood extent
def make_flood_extent_polygon(flood_extent):
flood_extent_dataset = gdal.Open(flood_extent)
cols = flood_extent_dataset.RasterXSize
rows = flood_extent_dataset.RasterYSize

# Get some metadata to filter out NaN values
flood_extent_raster = flood_extent_dataset.GetRasterBand(1)
noDataVal = flood_extent_raster.GetNoDataValue() # no data value
scaleFactor = flood_extent_raster.GetScale() # scale factor

# Assign flood_extent No Data Values to NaN
flood_extent_array = flood_extent_dataset.GetRasterBand(1).ReadAsArray(0, 0, cols, rows).astype(np.float)
flood_extent_array[flood_extent_array == int(noDataVal)] = np.nan
flood_extent_array = flood_extent_array / scaleFactor

# Assign flood_extent Negative Values to NaN
flood_extent_nonzero_array = copy.copy(flood_extent_array)
flood_extent_nonzero_array[flood_extent_array < 0] = np.nan

# make temporary output file
mem_drv = gdal.GetDriverByName('MEM')
target = mem_drv.Create('temp_tif', cols, rows, 1, gdal.GDT_Float32)
target.GetRasterBand(1).WriteArray(flood_extent_nonzero_array)

# Add GeoTranform and Projection
geotrans = flood_extent_dataset.GetGeoTransform()
proj = flood_extent_dataset.GetProjection()
target.SetGeoTransform(geotrans)
target.SetProjection(proj)
target.FlushCache()

# set up inputs for converting flood extent raster to polygon
band = target.GetRasterBand(1)
band.ReadAsArray()

outshape_location = tempfile.gettempdir()
outshape_location_path = os.path.abspath(outshape_location)
outShapefile = outshape_location_path + "/" + "polygonized.shp"
driver = ogr.GetDriverByName("ESRI Shapefile")
outDatasource = driver.CreateDataSource(outShapefile)
outLayer = outDatasource.CreateLayer("buffalo", srs=None)

# Add the DN field
newField = ogr.FieldDefn('HydroID', ogr.OFTInteger)
outLayer.CreateField(newField)

# Polygonize
gdal.Polygonize(band, None, outLayer, 0, [], callback=None)
outDatasource.Destroy()
sourceRaster = None

fullpath = os.path.abspath(outShapefile)
print(fullpath)
print(type(fullpath))

return fullpath
# Function that transforms vector dataset to raster
def bbox_to_pixel_offsets(gt, bbox):
originX = gt[0]
originY = gt[3]
pixel_width = gt[1]
pixel_height = gt[5]
x1 = int((bbox[0] - originX) / pixel_width)
x2 = int((bbox[1] - originX) / pixel_width) + 1

y1 = int((bbox[3] - originY) / pixel_height)
y2 = int((bbox[2] - originY) / pixel_height) + 1

xsize = x2 - x1
ysize = y2 - y1
return (x1, y1, xsize, ysize)


# Main function that determines zonal statistics of raster classes in a polygon area
def zonal_stats(vector_path, raster_path_dict, nodata_value=None, global_src_extent=False):

# Loop through different raster paths in the raster_path_dict and
# perform zonal statistics on the files.
for layer in raster_path_dict:
raster_path = raster_path_dict[layer]
if raster_path == "": # Only process if a raster path is provided
continue
if layer == 'flood_extent' and raster_path_dict["nlcd"]!= "":
vector_path = make_flood_extent_polygon(flood_extent)
raster_path = raster_path_dict["nlcd"]


# Opens raster file and sets path
rds = gdal.Open(raster_path, GA_ReadOnly)

assert rds
rb = rds.GetRasterBand(1)
rgt = rds.GetGeoTransform()

if nodata_value:
nodata_value = float(nodata_value)
rb.SetNoDataValue(nodata_value)
if vector_path == "":
print('No vector path provided. Continuing to next layer.')
continue
# Opens vector file and sets path

try:
vds = ogr.Open(vector_path)
vlyr = vds.GetLayer(0)
except:
continue

# Creates an in-memory numpy array of the source raster data covering the whole extent of the vector layer
if global_src_extent:
# use global source extent
# useful only when disk IO or raster scanning inefficiencies are your limiting factor
# advantage: reads raster data in one pass
# disadvantage: large vector extents may have big memory requirements
src_offset = bbox_to_pixel_offsets(rgt, vlyr.GetExtent())
src_array = rb.ReadAsArray(*src_offset)

# calculate new geotransform of the layer subset
new_gt = (
(rgt[0] + (src_offset[0] * rgt[1])),
rgt[1],
0.0,
(rgt[3] + (src_offset[1] * rgt[5])),
0.0,
rgt[5]
)

mem_drv = ogr.GetDriverByName('Memory')
driver = gdal.GetDriverByName('MEM')

# Loop through vectors, as many as exist in file
# Creates new list to contain their stats
stats = []
feat = vlyr.GetNextFeature()
while feat is not None:

if not global_src_extent:
# use local source extent
# fastest option when you have fast disks and well indexed raster (ie tiled Geotiff)
# advantage: each feature uses the smallest raster chunk
# disadvantage: lots of reads on the source raster
src_offset = bbox_to_pixel_offsets(rgt, feat.geometry().GetEnvelope())
src_array = rb.ReadAsArray(*src_offset)

# calculate new geotransform of the feature subset
new_gt = (
(rgt[0] + (src_offset[0] * rgt[1])),
rgt[1],
0.0,
(rgt[3] + (src_offset[1] * rgt[5])),
0.0,
rgt[5]
)

# Create a temporary vector layer in memory
mem_ds = mem_drv.CreateDataSource('out')
mem_layer = mem_ds.CreateLayer('poly', None, ogr.wkbPolygon)
mem_layer.CreateFeature(feat.Clone())

# Rasterize temporary vector layer
rvds = driver.Create('', src_offset[2], src_offset[3], 1, gdal.GDT_Byte)
rvds.SetGeoTransform(new_gt)
gdal.RasterizeLayer(rvds, [1], mem_layer, burn_values=[1])
rv_array = rvds.ReadAsArray()

# Mask the source data array with our current feature and get statistics (pixel count) of masked areas
# we take the logical_not to flip 0<->1 to get the correct mask effect
# we also mask out nodata values explictly
if src_array is None:
feat = vlyr.GetNextFeature()
continue
masked = np.ma.MaskedArray(
src_array,
mask=np.logical_or(
src_array == nodata_value,
np.logical_not(rv_array)
)
)

# Call different counter functions depending on the raster's source.
if layer == "nlcd":
feature_stats = get_nlcd_counts(feat, masked)
if layer == "agreement_raster":
feature_stats = get_mask_value_counts(feat, masked)
if layer == "levees":
feature_stats = get_levee_counts(feat, masked)
if layer == "bridges":
feature_stats = get_bridge_counts(feat, masked)
if layer == "flood_extent":
feature_stats = get_nlcd_counts_inside_flood(feat, masked)

stats.append(feature_stats)

rvds = None
mem_ds = None
feat = vlyr.GetNextFeature()

vds = None
rds = None
return stats


# Creates and prints dataframe containing desired statistics
if __name__ == "__main__":
# opts = {'VECTOR': sys.argv[1:], 'RASTER': sys.argv[2:]}
# stats = zonal_stats(opts['VECTOR'], opts['RASTER'])

parser = argparse.ArgumentParser(description='Computes pixel counts for raster classes within a vector area.')
parser.add_argument('-v', '--vector',
help='Path to vector file.',
required=False,
default="")
parser.add_argument('-n', '--nlcd',
help='Path to National Land Cover Database raster file.',
required=False,
default="")
parser.add_argument('-l', '--levees',
help='Path to levees raster file.',
required=False,
default="")
parser.add_argument('-b', '--bridges',
help='Path to bridges file.',
required=False,
default="")
parser.add_argument('-f', '--flood_extent',
help='Path to flood extent file.',
required=False,
default="")
parser.add_argument('-c', '--csv',
help='Path to export csv file.',
required=True)
# Assign variables from arguments.
args = vars(parser.parse_args())
vector = args['vector']
nlcd = args['nlcd']
levees = args['levees']
bridges = args['bridges']
flood_extent = args['flood_extent']

csv = args['csv']

raster_path_dict = {'nlcd': nlcd, 'levees': levees, 'bridges': bridges, 'flood_extent': flood_extent}
stats = zonal_stats(vector, raster_path_dict)

# Export CSV
df = pd.DataFrame(stats)
result = df[(df >= 0).all(axis=1)]
df2 = pd.DataFrame(result)
df2.to_csv(csv, index=False)
Loading

0 comments on commit 8a043d6

Please sign in to comment.