diff --git a/CHANGELOG.md b/CHANGELOG.md
index a12d1911c..808effc01 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,25 @@ All notable changes to this project will be documented in this file.
We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.
+## v3.0.19.1 - 2021-06-17 - [PR #417](https://github.com/NOAA-OWP/cahaba/pull/417)
+
+Adding a thalweg profile tool to identify significant drops in thalweg elevation. Also setting lateral thalweg adjustment threshold in hydroconditioning.
+
+## Additions
+- `thalweg_drop_check.py` checks the elevation along the thalweg for each stream path downstream of MS headwaters within a HUC.
+
+## Removals
+- Removing `dissolveLinks` arg from `clip_vectors_to_wbd.py`.
+
+
+## Changes
+- Cleaned up code in `split_flows.py` to make it more readable.
+- Refactored `reduce_nhd_stream_density.py` and `adjust_headwater_streams.py` to limit MS headwater points in `agg_nhd_headwaters_adj.gpkg`.
+- Fixed a bug in `adjust_thalweg_lateral.py` lateral elevation replacement threshold; changed threshold to 3 meters.
+- Updated `aggregate_vector_inputs.py` to log intermediate processes.
+
+
+
## v3.0.19.0 - 2021-06-10 - [PR #415](https://github.com/NOAA-OWP/cahaba/pull/415)
Feature to evaluate performance of alternative CatFIM techniques.
diff --git a/config/params_calibrated.env b/config/params_calibrated.env
index aa7aba1b0..3ca30650e 100644
--- a/config/params_calibrated.env
+++ b/config/params_calibrated.env
@@ -4,6 +4,7 @@
export negative_burn_value=1000
export agree_DEM_buffer=70
export wbd_buffer=5000
+export thalweg_lateral_elev_threshold=3
#### geospatial parameters ####
export max_split_distance_meters=1500
diff --git a/config/params_template.env b/config/params_template.env
index a998dc675..87270a669 100644
--- a/config/params_template.env
+++ b/config/params_template.env
@@ -4,6 +4,7 @@
export negative_burn_value=1000
export agree_DEM_buffer=70
export wbd_buffer=5000
+export thalweg_lateral_elev_threshold=3
#### geospatial parameters ####
export max_split_distance_meters=1500
diff --git a/src/adjust_headwater_streams.py b/src/adjust_headwater_streams.py
index 71f73186e..961d39d01 100644
--- a/src/adjust_headwater_streams.py
+++ b/src/adjust_headwater_streams.py
@@ -29,10 +29,10 @@ def adjust_headwaters(huc,nhd_streams,nwm_headwaters,nws_lids,headwater_id):
nws_lid_limited = nws_lid_limited.loc[nws_lid_limited.nws_lid!='']
nws_lid_limited = nws_lid_limited.drop(columns=['nws_lid'])
- # Check for issues in nws_lid layer
- if len(nws_lid_limited) < len(nws_lids):
- missing_nws_lids = list(set(nws_lids.site_id) - set(nws_lid_limited.site_id))
- print (f"nws lid(s) {missing_nws_lids} missing from aggregate dataset in huc {huc}")
+ # Check for issues in nws_lid layer (now this reports back non-headwater nws_lids)
+ # if len(nws_lid_limited) < len(nws_lids):
+ # missing_nws_lids = list(set(nws_lids.site_id) - set(nws_lid_limited.site_id))
+ # print (f"nws lid(s) {missing_nws_lids} missing from aggregate dataset in huc {huc}")
# Combine NWM headwaters and AHPS sites to be snapped to NHDPlus HR segments
headwater_pts = headwater_limited.append(nws_lid_limited)
diff --git a/src/adjust_thalweg_lateral.py b/src/adjust_thalweg_lateral.py
index 90c8cecbf..47cd2e209 100755
--- a/src/adjust_thalweg_lateral.py
+++ b/src/adjust_thalweg_lateral.py
@@ -7,42 +7,43 @@
import numpy as np
@profile
-def adjust_thalweg_laterally(elevation_raster, stream_raster, allocation_raster, cost_distance_raster, cost_distance_tolerance, dem_lateral_thalweg_adj):
-
+def adjust_thalweg_laterally(elevation_raster, stream_raster, allocation_raster, cost_distance_raster, cost_distance_tolerance, dem_lateral_thalweg_adj,lateral_elevation_threshold):
+
# ------------------------------------------- Get catchment_min_dict --------------------------------------------------- #
# The following algorithm searches for the zonal minimum elevation in each pixel catchment
# It updates the catchment_min_dict with this zonal minimum elevation value.
@njit
def make_zone_min_dict(elevation_window, zone_min_dict, zone_window, cost_window, cost_tolerance, ndv):
- for i,cm in enumerate(zone_window):
+ for i,elev_m in enumerate(zone_window):
# If the zone really exists in the dictionary, compare elevation values.
i = int(i)
- cm = int(cm)
-
+ elev_m = int(elev_m)
+
if (cost_window[i] <= cost_tolerance):
if elevation_window[i] > 0: # Don't allow bad elevation values
- if (cm in zone_min_dict):
-
- if (elevation_window[i] < zone_min_dict[cm]):
+ if (elev_m in zone_min_dict):
+
+ if (elevation_window[i] < zone_min_dict[elev_m]):
# If the elevation_window's elevation value is less than the zone_min_dict min, update the zone_min_dict min.
- zone_min_dict[cm] = elevation_window[i]
+ zone_min_dict[elev_m] = elevation_window[i]
else:
- zone_min_dict[cm] = elevation_window[i]
+ zone_min_dict[elev_m] = elevation_window[i]
+
return(zone_min_dict)
-
+
# Open the masked gw_catchments_pixels_masked and dem_thalwegCond_masked.
elevation_raster_object = rasterio.open(elevation_raster)
allocation_zone_raster_object = rasterio.open(allocation_raster)
cost_distance_raster_object = rasterio.open(cost_distance_raster)
-
+
meta = elevation_raster_object.meta.copy()
meta['tiled'], meta['compress'] = True, 'lzw'
-
+
# -- Create zone_min_dict -- #
print("Create zone_min_dict")
zone_min_dict = typed.Dict.empty(types.int32,types.float32) # Initialize an empty dictionary to store the catchment minimums.
# Update catchment_min_dict with pixel sheds minimum.
-
+
for ji, window in elevation_raster_object.block_windows(1): # Iterate over windows, using elevation_raster_object as template.
elevation_window = elevation_raster_object.read(1,window=window).ravel() # Define elevation_window.
zone_window = allocation_zone_raster_object.read(1,window=window).ravel() # Define zone_window.
@@ -50,72 +51,69 @@ def make_zone_min_dict(elevation_window, zone_min_dict, zone_window, cost_window
# Call numba-optimized function to update catchment_min_dict with pixel sheds minimum.
zone_min_dict = make_zone_min_dict(elevation_window, zone_min_dict, zone_window, cost_window, int(cost_distance_tolerance), meta['nodata'])
-
- # ------------------------------------------------------------------------------------------------------------------------ #
-
+
+ # ------------------------------------------------------------------------------------------------------------------------ #
+
elevation_raster_object.close()
allocation_zone_raster_object.close()
cost_distance_raster_object.close()
-
+
# ------------------------------------------- Assign zonal min to thalweg ------------------------------------------------ #
@njit
def minimize_thalweg_elevation(dem_window, zone_min_dict, zone_window, thalweg_window):
-
+
# Copy elevation values into new array that will store the minimized elevation values.
dem_window_to_return = np.empty_like (dem_window)
dem_window_to_return[:] = dem_window
-
- for i,cm in enumerate(zone_window):
+
+ for i,elev_m in enumerate(zone_window):
i = int(i)
- cm = int(cm)
+ elev_m = int(elev_m)
thalweg_cell = thalweg_window[i] # From flows_grid_boolean.tif (0s and 1s)
if thalweg_cell == 1: # Make sure thalweg cells are checked.
- if cm in zone_min_dict:
- zone_min_elevation = zone_min_dict[cm]
+ if elev_m in zone_min_dict:
+ zone_min_elevation = zone_min_dict[elev_m]
dem_thalweg_elevation = dem_window[i]
-
- elevation_difference = zone_min_elevation - dem_thalweg_elevation
-
- if zone_min_elevation < dem_thalweg_elevation and elevation_difference <= 5:
+
+ elevation_difference = dem_thalweg_elevation - zone_min_elevation
+
+ if (zone_min_elevation < dem_thalweg_elevation) and (elevation_difference <= lateral_elevation_threshold):
dem_window_to_return[i] = zone_min_elevation
return(dem_window_to_return)
-
+
# Specify raster object metadata.
elevation_raster_object = rasterio.open(elevation_raster)
allocation_zone_raster_object = rasterio.open(allocation_raster)
thalweg_object = rasterio.open(stream_raster)
-
+
dem_lateral_thalweg_adj_object = rasterio.open(dem_lateral_thalweg_adj, 'w', **meta)
-
+
for ji, window in elevation_raster_object.block_windows(1): # Iterate over windows, using dem_rasterio_object as template.
dem_window = elevation_raster_object.read(1,window=window) # Define dem_window.
window_shape = dem_window.shape
dem_window = dem_window.ravel()
-
+
zone_window = allocation_zone_raster_object.read(1,window=window).ravel() # Define catchments_window.
thalweg_window = thalweg_object.read(1,window=window).ravel() # Define thalweg_window.
-
+
# Call numba-optimized function to reassign thalweg cell values to catchment minimum value.
minimized_dem_window = minimize_thalweg_elevation(dem_window, zone_min_dict, zone_window, thalweg_window)
minimized_dem_window = minimized_dem_window.reshape(window_shape).astype(np.float32)
- dem_lateral_thalweg_adj_object.write(minimized_dem_window, window=window, indexes=1)
-
+ dem_lateral_thalweg_adj_object.write(minimized_dem_window, window=window, indexes=1)
+
elevation_raster_object.close()
allocation_zone_raster_object.close()
cost_distance_raster_object.close()
-
- # Delete allocation_raster and distance_raster.
-
-
-
+
+
if __name__ == '__main__':
-
+
# Parse arguments.
parser = argparse.ArgumentParser(description='Adjusts the elevation of the thalweg to the lateral zonal minimum.')
parser.add_argument('-e','--elevation_raster',help='Raster of elevation.',required=True)
@@ -124,11 +122,9 @@ def minimize_thalweg_elevation(dem_window, zone_min_dict, zone_window, thalweg_w
parser.add_argument('-d','--cost_distance_raster',help='Raster of cost distances for the allocation raster.',required=True)
parser.add_argument('-t','--cost_distance_tolerance',help='Tolerance in meters to use when searching for zonal minimum.',required=True)
parser.add_argument('-o','--dem_lateral_thalweg_adj',help='Output elevation raster with adjusted thalweg.',required=True)
-
+ parser.add_argument('-th','--lateral_elevation_threshold',help='Maximum difference between current thalweg elevation and lowest lateral elevation in meters.',required=True,type=int)
+
# Extract to dictionary and assign to variables.
args = vars(parser.parse_args())
-
+
adjust_thalweg_laterally(**args)
-
-
-
diff --git a/src/aggregate_vector_inputs.py b/src/aggregate_vector_inputs.py
index 2c1081989..3675b88b8 100755
--- a/src/aggregate_vector_inputs.py
+++ b/src/aggregate_vector_inputs.py
@@ -89,6 +89,7 @@ def find_nwm_incoming_streams(nwm_streams_,wbd,huc_unit):
# Input wbd
if isinstance(wbd,str):
+
layer = f"WBDHU{huc_unit}"
wbd = gpd.read_file(wbd, layer=layer)
elif isinstance(wbd,gpd.GeoDataFrame):
@@ -175,6 +176,7 @@ def find_nwm_incoming_streams(nwm_streams_,wbd,huc_unit):
huc_intersection = gpd.GeoDataFrame({'geometry': intersecting_points, 'NHDPlusID': nhdplus_ids,'mainstem': mainstem_flag},crs=nwm_streams.crs,geometry='geometry')
huc_intersection = huc_intersection.drop_duplicates()
+
del nwm_streams,wbd
return huc_intersection
@@ -182,7 +184,8 @@ def find_nwm_incoming_streams(nwm_streams_,wbd,huc_unit):
def collect_stream_attributes(nhdplus_vectors_dir, huc):
- print ('Starting huc: ' + str(huc))
+ print (f"Starting attribute collection for HUC {huc}",flush=True)
+
# Collecting NHDPlus HR attributes
burnline_filename = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '.gpkg')
vaa_filename = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusFlowLineVAA' + str(huc) + '.gpkg')
@@ -214,10 +217,10 @@ def collect_stream_attributes(nhdplus_vectors_dir, huc):
nhd_streams.to_file(nhd_streams_agg_fileName,driver=getDriver(nhd_streams_agg_fileName),index=False)
del nhd_streams
- print ('finished huc: ' + str(huc))
+ print (f"finished attribute collection for HUC {huc}",flush=True)
else:
- print ('missing data for huc ' + str(huc))
+ print (f"missing data for HUC {huc}",flush=True)
def subset_stream_networks(args, huc):
@@ -229,10 +232,11 @@ def subset_stream_networks(args, huc):
nhdplus_vectors_dir = args[4]
nwm_huc4_intersections_filename = args[5]
- print("starting HUC " + str(huc),flush=True)
+ print(f"starting stream subset for HUC {huc}",flush=True)
nwm_headwater_id = 'ID'
ahps_headwater_id = 'nws_lid'
headwater_pts_id = 'site_id'
+
column_order = ['pt_type', headwater_pts_id, 'mainstem', 'geometry']
nhd_streams_filename = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_agg.gpkg')
@@ -247,13 +251,20 @@ def subset_stream_networks(args, huc):
huc_mask = huc_mask.reset_index(drop=True)
if len(selected_wbd8.HUC8) > 0:
+
selected_wbd8 = selected_wbd8.reset_index(drop=True)
# Identify FR/NWM headwaters and subset HR network
- nhd_streams_fr = subset_nhd_network(huc,huc_mask,selected_wbd8,nhd_streams_filename,nwm_headwaters_filename,nwm_headwater_id,nwm_huc4_intersections_filename)
+ try:
+ nhd_streams_fr = subset_nhd_network(huc,huc_mask,selected_wbd8,nhd_streams_filename,nwm_headwaters_filename,nwm_headwater_id,nwm_huc4_intersections_filename)
+ except:
+ print (f"Error subsetting NHD HR network for HUC {huc}",flush=True)
# Identify nhd mainstem streams
- nhd_streams_all = subset_nhd_network(huc,huc_mask,selected_wbd8,nhd_streams_fr,ahps_filename,ahps_headwater_id,nwm_huc4_intersections_filename,True)
+ try:
+ nhd_streams_all = subset_nhd_network(huc,huc_mask,selected_wbd8,nhd_streams_fr,ahps_filename,ahps_headwater_id,nwm_huc4_intersections_filename,True)
+ except:
+ print (f"Error identifing MS network for HUC {huc}",flush=True)
# Identify HUC8 intersection points
nhd_huc8_intersections = find_nwm_incoming_streams(nhd_streams_all,selected_wbd8,8)
@@ -265,17 +276,17 @@ def subset_stream_networks(args, huc):
# Load nws lids
nws_lids = gpd.read_file(ahps_filename, mask=huc_mask)
- nws_lids = nws_lids.drop(columns=['name','nwm_featur'])
+ nws_lids = nws_lids.drop(columns=['name','nwm_feature_id','usgs_site_code','states','HUC8','is_headwater', 'is_colocated'])
nws_lids = nws_lids.rename(columns={"nws_lid": headwater_pts_id})
nws_lids['pt_type'] = 'nws_lid'
nws_lids['mainstem'] = True
if (len(nwm_headwaters) > 0) or (len(nws_lids) > 0):
+
# Adjust FR/NWM headwater segments
adj_nhd_streams_all, adj_nhd_headwater_points = adjust_headwaters(huc,nhd_streams_all,nwm_headwaters,nws_lids,headwater_pts_id)
adj_nhd_headwater_points = adj_nhd_headwater_points[column_order]
-
nhd_huc8_intersections['pt_type'] = 'nhd_huc8_intersections'
nhd_huc8_intersections = nhd_huc8_intersections.rename(columns={"NHDPlusID": headwater_pts_id})
nhd_huc8_intersections = nhd_huc8_intersections[column_order]
@@ -290,11 +301,14 @@ def subset_stream_networks(args, huc):
adj_nhd_headwater_points_all.to_file(adj_nhd_headwaters_all_fileName,driver=getDriver(adj_nhd_headwaters_all_fileName),index=False)
del adj_nhd_streams_all, adj_nhd_headwater_points_all
+
else:
- print (f"skipping headwater adjustments for HUC: {huc}")
+
+ print (f"skipping headwater adjustments for HUC {huc}")
del nhd_streams_fr
+ print(f"finished stream subset for HUC {huc}",flush=True)
def aggregate_stream_networks(nhdplus_vectors_dir,agg_nhd_headwaters_adj_fileName,agg_nhd_streams_adj_fileName,huc_list):
@@ -330,6 +344,7 @@ def aggregate_stream_networks(nhdplus_vectors_dir,agg_nhd_headwaters_adj_fileNam
def clean_up_intermediate_files(nhdplus_vectors_dir):
for huc in os.listdir(nhdplus_vectors_dir):
+
agg_path= os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_agg.gpkg')
streams_adj_path= os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_adj.gpkg')
headwater_adj_path= os.path.join(nhdplus_vectors_dir,huc,'nhd' + str(huc) + '_headwaters_adj.gpkg')
@@ -385,18 +400,19 @@ def clean_up_intermediate_files(nhdplus_vectors_dir):
if not os.path.isfile(streams_adj_path):
missing_subsets = missing_subsets + [huc]
- print (f"running subset_results on {len(missing_subsets)} HUC4s")
+ print (f"Subsetting stream network for {len(missing_subsets)} HUC4s")
num_workers=11
with ProcessPoolExecutor(max_workers=num_workers) as executor:
# Preprocess nhd hr and add attributes
- # collect_attributes = [executor.submit(collect_stream_attributes, nhdplus_vectors_dir, str(huc)) for huc in huc_list]
+ collect_attributes = [executor.submit(collect_stream_attributes, nhdplus_vectors_dir, str(huc)) for huc in huc_list]
# Subset nhd hr network
subset_results = [executor.submit(subset_stream_networks, subset_arg_list, str(huc)) for huc in missing_subsets]
- # del wbd4,wbd8
+ del wbd4,wbd8
- # Aggregate fr and ms nhd netowrks for entire nwm domain
+ # Aggregate subset nhd networks for entire nwm domain
+ print ('Aggregating subset NHD networks for entire NWM domain')
aggregate_stream_networks(nhdplus_vectors_dir,agg_nhd_headwaters_adj_fileName,agg_nhd_streams_adj_fileName,huc_list)
# Remove intermediate files
diff --git a/src/clip_vectors_to_wbd.py b/src/clip_vectors_to_wbd.py
index ef7ff4837..65fd72d20 100755
--- a/src/clip_vectors_to_wbd.py
+++ b/src/clip_vectors_to_wbd.py
@@ -8,7 +8,8 @@
from utils.shared_functions import getDriver
@profile
-def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,extent,great_lakes_filename,wbd_buffer_distance,lake_buffer_distance,dissolveLinks=False):
+def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,extent,great_lakes_filename,wbd_buffer_distance,lake_buffer_distance):
+
hucUnitLength = len(str(hucCode))
# Get wbd buffer
@@ -88,6 +89,7 @@ def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_l
nhd_streams = nhd_streams.loc[nhd_streams.mainstem==1]
if len(nhd_streams) > 0:
+
# Find incoming stream segments (to WBD buffer) and identify which are upstream
threshold_segments = gpd.overlay(nhd_streams, wbd_buffer, how='symmetric_difference')
from_list = threshold_segments.FromNode.to_list()
@@ -148,8 +150,6 @@ def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_l
parser.add_argument('-gl','--great-lakes-filename',help='Great Lakes layer',required=True)
parser.add_argument('-wb','--wbd-buffer-distance',help='WBD Mask buffer distance',required=True,type=int)
parser.add_argument('-lb','--lake-buffer-distance',help='Great Lakes Mask buffer distance',required=True,type=int)
- parser.add_argument('-o','--dissolve-links',help='remove multi-line strings',action="store_true",default=False)
-
args = vars(parser.parse_args())
@@ -174,6 +174,5 @@ def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_l
great_lakes_filename = args['great_lakes_filename']
wbd_buffer_distance = args['wbd_buffer_distance']
lake_buffer_distance = args['lake_buffer_distance']
- dissolveLinks = args['dissolve_links']
- subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,extent,great_lakes_filename,wbd_buffer_distance,lake_buffer_distance,dissolveLinks)
\ No newline at end of file
+ subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,extent,great_lakes_filename,wbd_buffer_distance,lake_buffer_distance)
diff --git a/src/output_cleanup.py b/src/output_cleanup.py
index 19d74abc8..d73a2deee 100755
--- a/src/output_cleanup.py
+++ b/src/output_cleanup.py
@@ -39,7 +39,7 @@ def output_cleanup(huc_number, output_folder_path, additional_whitelist, is_prod
'bathy_xs_area_hydroid_lookup.csv',
'src_full_crosswalked.csv',
'usgs_elev_table.csv',
- 'hand_ref_elev_table.csv'
+ 'hand_ref_elev_table.csv',
]
# List of files that will be saved during a viz run
diff --git a/src/raster.py b/src/raster.py
deleted file mode 100644
index a10a02430..000000000
--- a/src/raster.py
+++ /dev/null
@@ -1,462 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-from osgeo import gdal, ogr, osr
-import numpy as np
-from os.path import isfile
-from os import remove
-from copy import deepcopy
-from subprocess import call
-
-class Raster:
-
- """
- Raster object from single band rasters
-
- ...
-
- Attributes
- ----------
- array : numpy array
- raster data in numpy array
- gt : list
- geotransform. see gdal docs for more info.
- proj : str
- Projection string
- ndv : number
- No data value
- des : str
- band description
- ct : gdal.colorTable
- color table
- dt : int
- GDAL GDT data type. See notes.
- dim : tuple
- raster dimensions (bands, rows, columns) for multi-bands and (row, columns) for single band
- nbands : int
- number of bands.
- nrows : int
- number of rows
- ncols : int
- number of columns
-
- Methods
- -------
- writeRaster(fileName,dtype=None,driverName='GTiff',verbose=False)
- Write out raster file as geotiff
- copy()
- Copy method. Uses deepcopy since array data is present
- clipToVector(raster_fileName,vector_fileName,verbose=False,output_fileType='GTiff',output_fileName=None,loadOutput=True)
- Clips to vector using gdalwarp command line utility
-
- Raises
- ------
- OSError
- If fileName does not exist
- ValueError
- Raises if input raster
-
- See Also
- --------
-
- Notes
- -----
- Currently only accepts single band rasters.
-
- Multiple datatypes are used. The table below shows which numpy datatypes correspond to the the GDAL types and their integer codes.
-
- # ## Integer Code ## ## Global Descriptor Table ## ## Numpy ##
- # 0 GDT_Unknown NA
- # 1 GDT_Byte np.bool, np.int ,np.int8, np.long, np.byte, np.uint8
- # 2 GDT_UInt16 np.uint16, np.ushort
- # 3 GDT_Int16 np.int16, np.short
- # 4 GDT_UInt32 np.uint32 , np.uintc
- # 5 GDT_Int32 np.int32, np.intc
- # 6 GDT_Float32 np.float32, np.single
- # 7 GDT_Float64 np.float64, np.double
- # 8 GDT_CInt16 np.complex64
- # 9 GDT_CInt32 np.complex64
- # 10 GDT_CFloat32 np.complex64
- # 11 GDT_CFloat64 np.complex128
- # 12 GDT_TypeCount NA
-
- Examples
- --------
- Load Raster
- >>> rasterData = fldpln.Raster('path/to/raster')
-
- """
-
- # converts numpy datatypes and gdal GDT variables to integer codes
- dataTypeConversion_name_to_integer = { np.int8 : 1 , np.bool : 1 , np.int : 1 , np.long : 1 , np.byte : 1, np.uint8 : 1,
- np.uint16 : 2 , np.int16 : 3 ,
- np.ushort : 2 , np.short : 3 ,
- np.uint32 : 4 , np.uintc : 4 , np.int32 : 5 , np.intc : 5 ,
- np.float32 : 6 , np.single : 6 ,
- np.float64 : 7 , np.double : 7 ,
- np.complex64 : 10 , np.complex128 : 11 ,
- 0:0,1:1,2:2,3:3,4:4,5:5,6:6,7:7,8:8,9:9,10:10,11:11,12:12 }
-
- # converts integer codes and gdal GDT variables to numpy datatypes
- dataTypeConversion_integer_to_name = {0 : np.complex128 , 1 : np.int8 , 2 : np.uint16 , 3 : np.int16 ,
- 4 : np.uint32 , 5 : np.int32 , 6 : np.float32 , 7 : np.float64 ,
- 8 : np.complex64 , 9 : np.complex64 , 10 : np.complex64 , 11 : np.complex128 }
-
-
- def __init__(self,fileName,loadArray=True,dtype=None):
-
- """
- Initializes Raster Instance from single band raster
-
- ...
-
- Parameters
- ----------
- fileName : str
- File path to single band raster
- dtype : numpy datatype or int, optional
- Numpy, GDT, or integer code data type used to override the data type on the file when imported to array (Default Value = None, None sets to the numpy array data type to the one in the raster file)
-
- Returns
- -------
- raster
- Instance of raster object
-
- """
-
- if not isfile(fileName):
- raise OSError("File \'{}\' does not exist".format(fileName))
-
- stream = gdal.Open(fileName,gdal.GA_ReadOnly)
-
- self.nrows,self.ncols = stream.RasterYSize , stream.RasterXSize
- self.nbands = stream.RasterCount
-
- if loadArray:
- self.array = stream.ReadAsArray()
-
- self.gt = stream.GetGeoTransform()
- self.proj = stream.GetProjection()
-
- # if self.nbands > 1:
- # raise ValueError('Raster class only accepts single band rasters for now')
-
- band = stream.GetRasterBand(1)
-
- self.ndv = band.GetNoDataValue()
-
- # set data type
- if dtype is not None: # override raster file type
-
- # sets dt to dtype integer code
- try:
- self.dt = self.dataTypeConversion_name_to_integer[dtype]
- except KeyError:
- raise ValueError('{} dtype parameter not accepted. check docs for valid input or set to None to use data type from raster'.format(dtype))
-
- # sets array data type
- if isinstance(dtype,type): # if dtype is a numpy data tpe
-
- self.array = self.array.astype(dtype)
-
- else: # if dtype is an integer code of GDAL GDT variable
-
- try:
- self.array = self.array.astype(self.dataTypeConversion_integer_to_name[dtype])
- except KeyError:
- raise ValueError('{} dtype parameter not accepted. check docs for valid input or set to None to use data type from raster'.format(dtype))
-
- else: # sets to default data type in raster file
-
- self.dt = band.DataType
-
- try:
- self.array.astype(self.dataTypeConversion_integer_to_name[self.dt])
- except KeyError:
- raise ValueError('{} dtype parameter not accepted. check docs for valid input or set to None to use data type from raster'.format(self.dt))
-
- try:
- self.des = band.GetDescription()
- except AttributeError:
- pass
-
- try:
- self.ct = stream.GetRasterColorTable()
- except AttributeError:
- pass
-
- # self.dim = self.array.shape
- self.fileName = fileName
-
- stream,band = None,None
-
-
- @property
- def dim(self):
- """ Property method for number of dimensions """
-
- if self.nbands == 1:
- DIMS = self.nrows,self.ncols
- if self.nbands > 1:
- DIMS = self.nbands,self.nrows,self.ncols
-
- return(DIMS)
-
-
- def copy(self):
- """ Copy method. Uses deepcopy since array data is present """
- return(deepcopy(self))
-
-
- def writeRaster(self,fileName,dtype=None,driverName='GTiff',verbose=False):
-
- """
- Write out raster file as geotiff
-
- Parameters
- ----------
- fileName : str
- File path to output raster to
- dtype : numpy datatype or int, optional
- Numpy, GDT, or integer code data type (Default Value = self.dt attribute value, otherwise uses data type from the numpy array)
- driverName : str, optional
- GDAL driver type. See gdal docs for more details. Only tested for GTiff. (Default Value = 'GTiff')
- verbose : Boolean, optional
- Verbose output (Default Value = False)
-
- Returns
- -------
- None
-
- Raises
- ------
- ValueError
- Raises ValueError when the data type parameter is not recognized. See the help docs for raster class to see which numpy, gdal, or encoded values are accepted.
-
- Examples
- --------
- Write Geotiff raster
- >>> rasterData = fldpln.Raster('path/to/raster')
- >>> rasterData.writeRaster('/different/path/to/raster',dtype=np.int8)
-
- """
-
- driver = gdal.GetDriverByName(driverName)
-
- if dtype is None:
- try:
- dtype = self.dt
- except AttributeError:
- # dtype = gdal.GDT_Float64
- try:
- dtype = self.dataTypeConversion_name_to_integer[self.array.dtype]
- except KeyError:
- raise ValueError('{} dtype parameter not accepted. check docs for valid input or set to None to use data type from numpy array'.format(self.array.dtype))
- else:
- try:
- dtype = self.dataTypeConversion_name_to_integer[dtype]
- except KeyError:
- raise ValueError('{} dtype parameter not accepted. check docs for valid input or set to None to use data type from numpy array'.format(self.array.dtype))
-
- dataset = driver.Create(fileName, self.ncols, self.nrows, 1, dtype)
- dataset.SetGeoTransform(self.gt)
- dataset.SetProjection(self.proj)
- band = dataset.GetRasterBand(1)
-
- # set color table and color interpretation
- #print(band.__dict__)
- try:
- band.SetRasterColorTable(self.ct)
- #band.SetRasterColorInterpretation(gdal.GCI_PaletteIndex)
- except AttributeError:
- pass
-
- try:
- band.SetDescription(self.des)
- except AttributeError:
- pass
-
- band.SetNoDataValue(self.ndv)
- band.WriteArray(self.array)
- band, dataset = None,None # Close the file
-
- if verbose:
- print("Successfully wrote out raster to {}".format(fileName))
-
- def polygonize(self,vector_fileName,vector_driver,layer_name,verbose):
-
- gdal.UseExceptions()
-
- # get raster datasource
- #
- src_ds = gdal.Open( self.fileName )
- srcband = src_ds.GetRasterBand(1)
-
- #
- # create output datasource
- driver_ext_dict = {'ESRI Shapefile' : 'shp' , 'GPKG' : 'gpkg'}
-
- if vector_driver not in driver_ext_dict:
- raise ValueError('Driver not found in {}'.format(driver_ext_dict))
-
- drv = ogr.GetDriverByName(vector_driver)
- dst_ds = drv.CreateDataSource( vector_fileName)
-
- srs = osr.SpatialReference()
- srs.ImportFromWkt(self.proj)
-
- dst_layer = dst_ds.CreateLayer(layer_name, srs = srs, geom_type = ogr.wkbPolygon )
-
- if verbose:
- prog_func = gdal.TermProgress_nocb
- else:
- prog_func = None
-
- gdal.Polygonize( srcband, None, dst_layer, -1, ['8CONNECTED=8'], callback=prog_func )
-
- @classmethod
- def clipToVector(cls,raster_fileName,vector_fileName,output_fileName=None,output_fileType='GTiff',verbose=False):
- """
- Clips to vector using gdalwarp command line utility
-
- ...
-
- Parameters
- ----------
- raster_fileName : str
- File path to raster to clip
- vector_fileName : str
- File path to vector layer to clip with
- output_fileName : str
- Set file path to output clipped raster (Default Value = None)
- output_fileType : str
- Set file type of output from GDAL drivers list (Default Value = 'GTiff')
- verbose : Boolean
- Verbose output (Default Value = False)
-
- Returns
- -------
- raster : raster
- Clipped raster layer
-
- Notes
- -----
- gdalwarp utility must be installed and callable via a subprocess
-
- Examples
- --------
- clip raster and don't return
- >>> fldpln.raster.clipToVector('path/to/raster','path/to/clipping/vector','path/to/write/output/raster/to')
- Clip raster and return but don't write
- >>> clippedRaster = fldpln.raster.clipToVector('path/to/raster','path/to/clipping/vector')
-
-
- """
-
- # create temp output if none is desired
- if output_fileName is None:
- output_fileName = 'temp.tif'
-
- # generate command
- command = ['gdalwarp','-overwrite','-of',output_fileType,'-cutline',vector_fileName,'-crop_to_cutline',raster_fileName,output_fileName]
-
- # insert quiet flag if not verbose
- if not verbose:
- command = command.insert(1,'-q')
-
- # call command
- call(command)
-
- # remove temp file
- if output_fileName is None:
- remove(output_fileName)
-
- return(cls(output_fileName))
-
- def getCoordinatesFromIndex(self,row,col):
- """
- Returns coordinates in the rasters projection from a given multi-index
-
- """
-
- # extract variables for readability
- x_upper_limit, y_upper_limit = self.gt[0], self.gt[3]
- x_resolution, y_resolution = self.gt[1], self.gt[5]
- nrows, ncols = self.nrows, self.ncols
-
- x = x_upper_limit + (col * x_resolution)
- y = y_upper_limit + (row * y_resolution)
-
- return(x,y)
-
-
- def sampleFromCoordinates(self,x,y,returns='value'):
- """
- Sample raster value from coordinates
- ...
-
- Parameters
- ----------
- raster_fileName : str
- File path to raster to clip
- vector_fileName : str
- File path to vector layer to clip with
- output_fileName : str
- Set file path to output clipped raster (Default Value = None)
- output_fileType : str
- Set file type of output from GDAL drivers list (Default Value = 'GTiff')
- verbose : Boolean
- Verbose output (Default Value = False)
-
- Returns
- -------
- raster : raster
- Clipped raster layer
-
- Notes
- -----
- gdalwarp utility must be installed and callable via a subprocess
-
- Examples
- --------
- clip raster and don't return
- >>> fldpln.raster.clipToVector('path/to/raster','path/to/clipping/vector','path/to/write/output/raster/to')
- Clip raster and return but don't write
- >>> clippedRaster = fldpln.raster.clipToVector('path/to/raster','path/to/clipping/vector')
-
-
- """
-
- # extract variables for readability
- x_upper_limit, y_upper_limit = self.gt[0], self.gt[3]
- x_resolution, y_resolution = self.gt[1], self.gt[5]
- nrows, ncols = self.nrows, self.ncols
-
- # get upper left hand corner coordinates from the centroid coordinates of the upper left pixel
- x_upper_limit = x_upper_limit - (x_resolution/2)
- y_upper_limit = y_upper_limit - (y_resolution/2)
-
- # get indices
- columnIndex = int( ( x - x_upper_limit) / x_resolution)
- rowIndex = int( ( y - y_upper_limit) / y_resolution)
-
- # check indices lie within raster limits
- columnIndexInRange = ncols > columnIndex >= 0
- rowIndexInRange = nrows > rowIndex >= 0
-
- if (not columnIndexInRange) | (not rowIndexInRange):
- raise ValueError("Row Index {} or column index {} not in raster range ({},{})".format(rowIndex,columnIndex,nrows,ncols))
-
- # check value is not ndv
- if self.array[rowIndex,columnIndex] == self.ndv:
- raise ValueError("Sample value is no data at ({},{})".format(nrows,ncols))
-
- # return if statements
- if returns == 'value':
- return(self.array[rowIndex,columnIndex])
- elif returns == 'multi-index':
- return(rowIndex,columnIndex)
- elif returns == 'ravel-index':
- return(np.ravel_multi_index((rowIndex,columnIndex),(nrows,ncols)))
- else:
- raise ValueError('Enter valid returns argument')
diff --git a/src/reachID_grid_to_vector_points.py b/src/reachID_grid_to_vector_points.py
index 5dadd43c1..c77bcc732 100755
--- a/src/reachID_grid_to_vector_points.py
+++ b/src/reachID_grid_to_vector_points.py
@@ -1,17 +1,15 @@
#!/usr/bin/env python3
-# -*- coding: utf-8
-from osgeo import gdal
import numpy as np
import osgeo.ogr
import osgeo.osr
import sys
-
-import cProfile
+import argparse
from tqdm import tqdm
import geopandas as gpd
+from utils.shared_variables import PREP_PROJECTION
from shapely.geometry import Point
-from raster import Raster
+import rasterio
from utils.shared_functions import getDriver
"""
@@ -19,67 +17,55 @@
./reachID_grid_to_vector_points.py
"""
+def convert_grid_cells_to_points(raster,index_option,output_points_filename=False):
-path = sys.argv[1]
-outputFileName = sys.argv[2]
-writeOption = sys.argv[3]
-
-#r = gdal.Open(path)
-#band = r.GetRasterBand(1)
-boolean=Raster(path)
-
-#(upper_left_x, x_size, x_rotation, upper_left_y, y_rotation, y_size) = r.GetGeoTransform()
-(upper_left_x, x_size, x_rotation, upper_left_y, y_rotation, y_size) = boolean.gt
-
-#a = band.ReadAsArray().astype(np.float)
-
-# indices = np.nonzero(a != band.GetNoDataValue())
-indices = np.nonzero(boolean.array >= 1)
+ # Input raster
+ if isinstance(raster,str):
+ raster = rasterio.open(raster,'r')
-# Init the shapefile stuff..
-#srs = osgeo.osr.SpatialReference()
-#srs.ImportFromWkt(r.GetProjection())
+ elif isinstance(raster,rasterio.io.DatasetReader):
+ pass
-#driver = osgeo.ogr.GetDriverByName('GPKG')
-#shapeData = driver.CreateDataSource(outputFileName)
+ else:
+ raise TypeError("Pass raster dataset or filepath for raster")
-#layer = shapeData.CreateLayer('ogr_pts', srs, osgeo.ogr.wkbPoint)
-#layerDefinition = layer.GetLayerDefn()
+ (upper_left_x, x_size, x_rotation, upper_left_y, y_rotation, y_size) = raster.get_transform()
+ indices = np.nonzero(raster.read(1) >= 1)
-#idField = osgeo.ogr.FieldDefn("id", osgeo.ogr.OFTInteger)
-#layer.CreateField(idField)
+ id =[None] * len(indices[0]);points = [None]*len(indices[0])
-id =[None] * len(indices[0]);points = [None]*len(indices[0])
+ # Iterate over the Numpy points..
+ i = 1
+ for y_index,x_index in zip(*indices):
+ x = x_index * x_size + upper_left_x + (x_size / 2) # add half the cell size
+ y = y_index * y_size + upper_left_y + (y_size / 2) # to center the point
+ points[i-1] = Point(x,y)
+ if index_option == 'reachID':
+ reachID = np.array(list(raster.sample((Point(x,y).coords), indexes=1))).item() # check this; needs to add raster cell value + index
+ id[i-1] = reachID*10000 + i #reachID + i/100
+ elif (index_option == 'featureID') |(index_option == 'pixelID'):
+ id[i-1] = i
+ i += 1
-# Iterate over the Numpy points..
-i = 1
-for y_index,x_index in tqdm(zip(*indices),total=len(indices[0])):
- x = x_index * x_size + upper_left_x + (x_size / 2) #add half the cell size
- y = y_index * y_size + upper_left_y + (y_size / 2) #to centre the point
+ pointGDF = gpd.GeoDataFrame({'id' : id, 'geometry' : points},crs=PREP_PROJECTION,geometry='geometry')
- # get raster value
- #reachID = a[y_index,x_index]
+ if output_points_filename == False:
+ return pointGDF
+ else:
+ pointGDF.to_file(output_points_filename,driver=getDriver(output_points_filename),index=False)
- #point = osgeo.ogr.Geometry(osgeo.ogr.wkbPoint)
- #point.SetPoint(0, x, y)
- points[i-1] = Point(x,y)
+if __name__ == '__main__':
- #feature = osgeo.ogr.Feature(layerDefinition)
- #feature.SetGeometry(point)
- #feature.SetFID(i)
- if writeOption == 'reachID':
- reachID = a[y_index,x_index]
- id[i-1] = reachID
- #feature.SetField("id",reachID)
- elif (writeOption == 'featureID') |( writeOption == 'pixelID'):
- #feature.SetField("id",i)
- id[i-1] = i
- #layer.CreateFeature(feature)
+ # Parse arguments
+ parser = argparse.ArgumentParser(description='Converts a raster to points')
+ parser.add_argument('-r','--raster',help='Raster to be converted to points',required=True,type=str)
+ parser.add_argument('-i', '--index-option',help='Indexing option',required=True,type=str,choices=['reachID','featureID','pixelID'])
+ parser.add_argument('-p', '--output-points-filename',help='Output points layer filename',required=False,type=str,default=False)
- i += 1
+ args = vars(parser.parse_args())
-pointGDF = gpd.GeoDataFrame({'id' : id, 'geometry' : points},crs=boolean.proj,geometry='geometry')
-pointGDF.to_file(outputFileName,driver=getDriver(outputFileName),index=False)
+ raster = args['raster']
+ index_option = args['index_option']
+ output_points_filename = args['output_points_filename']
-print("Complete")
-#shapeData.Destroy()
+ convert_grid_cells_to_points(raster,index_option,output_points_filename)
diff --git a/src/reduce_nhd_stream_density.py b/src/reduce_nhd_stream_density.py
index 9614bfe32..c32afd990 100644
--- a/src/reduce_nhd_stream_density.py
+++ b/src/reduce_nhd_stream_density.py
@@ -7,6 +7,7 @@
import argparse
import pygeos
from shapely.wkb import dumps
+from shapely.geometry import Point
from utils.shared_functions import getDriver
@@ -29,7 +30,7 @@ def subset_nhd_network(huc4,huc4_mask,selected_wbd8,nhd_streams_,headwaters_file
for index, row in selected_wbd8.iterrows():
huc = row["HUC8"]
- # Double check that this is a nested HUC (probably overkill)
+ # Double check that this is a nested HUC
if huc.startswith(str(huc4)):
huc8_mask = selected_wbd8.loc[selected_wbd8.HUC8==huc]
@@ -44,6 +45,9 @@ def subset_nhd_network(huc4,huc4_mask,selected_wbd8,nhd_streams_,headwaters_file
streams_subset = gpd.read_file(nhd_streams_, mask = huc8_mask)
else:
streams_subset = nhd_streams.loc[nhd_streams.HUC8==huc].copy()
+ if headwaters_mask.is_headwater.dtype != 'int': headwaters_mask.is_headwater = headwaters_mask.is_headwater.astype('int')
+ if headwaters_mask.is_colocated.dtype != 'int': headwaters_mask.is_colocated = headwaters_mask.is_colocated.astype('int')
+ headwaters_mask = headwaters_mask.loc[headwaters_mask.is_headwater==True]
if not streams_subset.empty:
streams_subset[headwater_col] = False
@@ -63,14 +67,14 @@ def subset_nhd_network(huc4,huc4_mask,selected_wbd8,nhd_streams_,headwaters_file
# Add headwaters_id column
streams_subset[id_col] = n
- # Find stream segment closest to headwater point
+ distance_from_upstream = {}
for index, point in headwaters_mask.iterrows():
# Convert headwaterpoint geometries to WKB representation
- wkb_points = dumps(point.geometry)
+ wkb_point = dumps(point.geometry)
# Create pygeos headwaterpoint geometries from WKB representation
- pointbin_geom = pygeos.io.from_wkb(wkb_points)
+ pointbin_geom = pygeos.io.from_wkb(wkb_point)
# Distance to each stream segment
distances = pygeos.measurement.distance(streambin_geom, pointbin_geom)
@@ -78,9 +82,30 @@ def subset_nhd_network(huc4,huc4_mask,selected_wbd8,nhd_streams_,headwaters_file
# Find minimum distance
min_index = np.argmin(distances)
+ headwater_point_name = point[headwater_id]
+
+ # Find stream segment closest to headwater point
+ if mainstem_flag==True:
+
+ if point.is_colocated==True:
+
+ closest_stream = streams_subset.iloc[min_index]
+ distance_to_line = point.geometry.distance(Point(closest_stream.geometry.coords[-1]))
+
+ print(f"{point.nws_lid} distance on line {closest_stream.NHDPlusID}: {np.round(distance_to_line,1)}")
+ if not closest_stream.NHDPlusID in distance_from_upstream.keys():
+
+ distance_from_upstream[closest_stream.NHDPlusID] = [point.nws_lid,distance_to_line]
+
+ elif distance_from_upstream[closest_stream.NHDPlusID][1] > distance_to_line:
+
+ distance_from_upstream[closest_stream.NHDPlusID] = [point.nws_lid,distance_to_line]
+
+ headwater_point_name = distance_from_upstream[closest_stream.NHDPlusID][0]
+
# Closest segment to headwater
streams_subset.loc[min_index,headwater_col] = True
- streams_subset.loc[min_index,id_col] = point[headwater_id]
+ streams_subset.loc[min_index,id_col] = headwater_point_name
headwater_streams = headwater_streams.append(streams_subset[['NHDPlusID',headwater_col,id_col,'HUC8']])
diff --git a/src/run_by_unit.sh b/src/run_by_unit.sh
index 6e9cfca7b..cab360898 100755
--- a/src/run_by_unit.sh
+++ b/src/run_by_unit.sh
@@ -132,9 +132,8 @@ python3 -m memory_profiler $srcDir/burn_in_levees.py -dem $outputHucDataDir/dem_
Tcount
## DEM Reconditioning ##
-# Using AGREE methodology, hydroenforce the DEM so that it is consistent
-# with the supplied stream network. This allows for more realistic catchment
-# delineation which is ultimately reflected in the output FIM mapping.
+# Using AGREE methodology, hydroenforce the DEM so that it is consistent with the supplied stream network.
+# This allows for more realistic catchment delineation which is ultimately reflected in the output FIM mapping.
echo -e $startDiv"Creating AGREE DEM using $agree_DEM_buffer meter buffer"$stopDiv
date -u
Tstart
@@ -191,7 +190,7 @@ Tcount
echo -e $startDiv"Performing lateral thalweg adjustment $hucNumber"$stopDiv
date -u
Tstart
-python3 -m memory_profiler $srcDir/adjust_thalweg_lateral.py -e $outputHucDataDir/dem_meters.tif -s $outputHucDataDir/demDerived_streamPixels.tif -a $outputHucDataDir/demDerived_streamPixels_ids_allo.tif -d $outputHucDataDir/demDerived_streamPixels_ids_dist.tif -t 50 -o $outputHucDataDir/dem_lateral_thalweg_adj.tif
+python3 -m memory_profiler $srcDir/adjust_thalweg_lateral.py -e $outputHucDataDir/dem_meters.tif -s $outputHucDataDir/demDerived_streamPixels.tif -a $outputHucDataDir/demDerived_streamPixels_ids_allo.tif -d $outputHucDataDir/demDerived_streamPixels_ids_dist.tif -t 50 -o $outputHucDataDir/dem_lateral_thalweg_adj.tif -th $thalweg_lateral_elev_threshold
Tcount
## MASK BURNED DEM FOR STREAMS ONLY ###
@@ -277,7 +276,7 @@ echo -e $startDiv"Vectorize Pixel Centroids $hucNumber"$stopDiv
date -u
Tstart
[ ! -f $outputHucDataDir/flows_points_pixels.gpkg ] && \
-$srcDir/reachID_grid_to_vector_points.py $demDerived_streamPixels $outputHucDataDir/flows_points_pixels.gpkg featureID
+$srcDir/reachID_grid_to_vector_points.py -r $demDerived_streamPixels -i featureID -p $outputHucDataDir/flows_points_pixels.gpkg
Tcount
## GAGE WATERSHED FOR PIXELS ##
diff --git a/src/split_flows.py b/src/split_flows.py
index 67b69f7e9..9d51065dd 100755
--- a/src/split_flows.py
+++ b/src/split_flows.py
@@ -181,27 +181,19 @@
else:
print ('Error: Could not add network attributes to stream segments')
-# Get Outlet Point Only
-#outlet = OrderedDict()
-#for i,segment in split_flows_gdf.iterrows():
-# outlet[segment.geometry.coords[-1]] = segment[hydro_id]
-
-#hydroIDs_points = [hidp for hidp in outlet.values()]
-#split_points = [Point(*point) for point in outlet]
-
# Get all vertices
split_points = OrderedDict()
-for row in split_flows_gdf[['geometry',hydro_id, 'NextDownID']].iterrows():
- lineString = row[1][0]
+for index, segment in split_flows_gdf.iterrows():
+ lineString = segment.geometry
for point in zip(*lineString.coords.xy):
if point in split_points:
- if row[1][2] == split_points[point]:
+ if segment.NextDownID == split_points[point]:
pass
else:
- split_points[point] = row[1][1]
+ split_points[point] = segment[hydro_id]
else:
- split_points[point] = row[1][1]
+ split_points[point] = segment[hydro_id]
hydroIDs_points = [hidp for hidp in split_points.values()]
split_points = [Point(*point) for point in split_points]
diff --git a/tools/thalweg_drop_check.py b/tools/thalweg_drop_check.py
new file mode 100644
index 000000000..c56d743e5
--- /dev/null
+++ b/tools/thalweg_drop_check.py
@@ -0,0 +1,385 @@
+#!/usr/bin/env python3
+
+import os
+import sys
+import geopandas as gpd
+sys.path.append('/foss_fim/src')
+from utils.shared_variables import PREP_PROJECTION
+from shapely.geometry import Point, LineString
+import rasterio
+import pandas as pd
+import numpy as np
+import argparse
+import matplotlib.pyplot as plt
+import seaborn as sns
+from collections import deque
+from os.path import join
+from multiprocessing import Pool
+from utils.shared_functions import getDriver
+from rasterio import features
+from reachID_grid_to_vector_points import convert_grid_cells_to_points
+import warnings
+warnings.simplefilter(action='ignore', category=FutureWarning)
+
+"""
+ Plot Rating Curves and Compare to USGS Gages
+
+ Parameters
+ ----------
+ fim_dir : str
+ Directory containing FIM output folders.
+ output_dir : str
+ Stream layer to be evaluated.
+ stream_type : str
+ File name of USGS rating curves.
+ point_density : str
+ Elevation sampling density.
+ number_of_jobs : str
+ Number of jobs.
+"""
+
+
+def compare_thalweg(args):
+
+ huc_dir = args[0]
+ stream_type = args[1]
+ point_density = args[2]
+ huc = args[3]
+ dem_meters_filename = args[4]
+ dem_lateral_thalweg_adj_filename = args[5]
+ dem_thalwegCond_filename = args[6]
+ profile_plots_filename = args[7]
+ profile_gpkg_filename = args[8]
+ profile_table_filename = args[9]
+ flows_grid_boolean_filename = args[10]
+
+ if stream_type == 'derived':
+
+ dem_derived_reaches_filename = os.path.join(huc_dir,'demDerived_reaches_split.gpkg')
+ streams = gpd.read_file(dem_derived_reaches_filename)
+ nhd_headwater_filename = os.path.join(huc_dir,'nhd_headwater_points_subset.gpkg')
+ wbd_filename = os.path.join(huc_dir,'wbd.gpkg')
+ wbd = gpd.read_file(wbd_filename)
+ headwaters_layer = gpd.read_file(nhd_headwater_filename,mask=wbd)
+ headwater_list = headwaters_layer.loc[headwaters_layer.pt_type == 'nws_lid']
+ stream_id = 'HydroID'
+
+ elif stream_type == 'burnline':
+
+ nhd_reaches_filename = os.path.join(huc_dir,'NHDPlusBurnLineEvent_subset.gpkg')
+ nhd_reaches = gpd.read_file(nhd_reaches_filename)
+ streams = nhd_reaches.copy()
+ headwaters_layer = None
+
+ # Get lists of all complete reaches using headwater attributes
+ headwater_list = streams.loc[streams.nws_lid!=''].nws_lid
+ stream_id = 'NHDPlusID'
+
+ headwater_col = 'is_headwater'
+ streams[headwater_col] = False
+ headwater_list = headwater_list.reset_index(drop=True)
+
+ if stream_type == 'derived':
+ streams['nws_lid'] = ''
+
+ if streams.NextDownID.dtype != 'int': streams.NextDownID = streams.NextDownID.astype(int)
+
+ min_dist = np.empty(len(headwater_list))
+ streams['min_dist'] = 1000
+
+ for i, point in headwater_list.iterrows():
+ streams['min_dist'] = [point.geometry.distance(line) for line in streams.geometry]
+ streams.loc[streams.min_dist==np.min(streams.min_dist),'nws_lid'] = point.site_id
+
+ headwater_list = headwater_list.site_id
+
+ streams.set_index(stream_id,inplace=True,drop=False)
+
+ # Collect headwater streams
+ single_stream_paths = []
+ dem_meters = rasterio.open(dem_meters_filename,'r')
+ index_option = 'reachID'
+ for index, headwater_site in enumerate(headwater_list):
+ stream_path = get_downstream_segments(streams.copy(),'nws_lid', headwater_site,'downstream',stream_id,stream_type)
+ stream_path = stream_path.reset_index(drop=True)
+ stream_path = stream_path.sort_values(by=['downstream_count'])
+ stream_path = stream_path.loc[stream_path.downstream==True]
+ if stream_type == 'burnline':
+ geom_value = []
+ for index, segment in stream_path.iterrows():
+ lineString = LineString(segment.geometry.coords[::-1])
+ geom_value = geom_value + [(lineString, segment.downstream_count)]
+ nhd_reaches_raster = features.rasterize(shapes=geom_value , out_shape=[dem_meters.height, dem_meters.width],fill=dem_meters.nodata,transform=dem_meters.transform, all_touched=True, dtype=np.float32)
+ flow_bool = rasterio.open(flows_grid_boolean_filename)
+ flow_bool_data = flow_bool.read(1)
+ nhd_reaches_raster = np.where(flow_bool_data == int(0), -9999.0, (nhd_reaches_raster).astype(rasterio.float32))
+ out_dem_filename = os.path.join(huc_dir,'NHDPlusBurnLineEvent_raster.tif')
+ with rasterio.open(out_dem_filename, "w", **dem_meters.profile, BIGTIFF='YES') as dest:
+ dest.write(nhd_reaches_raster, indexes = 1)
+ stream_path = convert_grid_cells_to_points(out_dem_filename,index_option)
+ stream_path["headwater_path"] = headwater_site
+ single_stream_paths = single_stream_paths + [stream_path]
+ print(f"length of {headwater_site} path: {len(stream_path)}")
+
+ # Collect elevation values from multiple grids along each individual reach point
+ dem_lateral_thalweg_adj = rasterio.open(dem_lateral_thalweg_adj_filename,'r')
+ dem_thalwegCond = rasterio.open(dem_thalwegCond_filename,'r')
+ thalweg_points = gpd.GeoDataFrame()
+ for path in single_stream_paths:
+ split_points = []
+ stream_ids = []
+ dem_m_elev = []
+ dem_burned_filled_elev = []
+ dem_lat_thal_adj_elev = []
+ dem_thal_adj_elev = []
+ headwater_path = []
+ index_count = []
+ for index, segment in path.iterrows():
+ if stream_type == 'derived':
+ linestring = segment.geometry
+ if point_density == 'midpoints':
+ midpoint = linestring.interpolate(0.5,normalized=True)
+ stream_ids = stream_ids + [segment[stream_id]]
+ split_points = split_points + [midpoint]
+ index_count = index_count + [segment.downstream_count]
+ dem_m_elev = dem_m_elev + [np.array(list(dem_meters.sample((Point(midpoint).coords), indexes=1))).item()]
+ dem_lat_thal_adj_elev = dem_lat_thal_adj_elev + [np.array(list(dem_lateral_thalweg_adj.sample((Point(midpoint).coords), indexes=1))).item()]
+ dem_thal_adj_elev = dem_thal_adj_elev + [np.array(list(dem_thalwegCond.sample((Point(midpoint).coords), indexes=1))).item()]
+ headwater_path = headwater_path + [segment.headwater_path]
+ elif point_density == 'all_points':
+ count=0
+ for point in zip(*linestring.coords.xy):
+ stream_ids = stream_ids + [segment[stream_id]]
+ split_points = split_points + [Point(point)]
+ count = count + 1
+ index_count = index_count + [segment.downstream_count*1000 + count]
+ dem_m_elev = dem_m_elev + [np.array(list(dem_meters.sample((Point(point).coords), indexes=1))).item()]
+ dem_lat_thal_adj_elev = dem_lat_thal_adj_elev + [np.array(list(dem_lateral_thalweg_adj.sample((Point(point).coords), indexes=1))).item()]
+ dem_thal_adj_elev = dem_thal_adj_elev + [np.array(list(dem_thalwegCond.sample((Point(point).coords), indexes=1))).item()]
+ headwater_path = headwater_path + [segment.headwater_path]
+ elif stream_type == 'burnline':
+ stream_ids = stream_ids + [segment['id']]
+ split_points = split_points + [Point(segment.geometry)]
+ index_count = index_count + [segment['id']]
+ dem_m_elev = dem_m_elev + [np.array(list(dem_meters.sample((Point(segment.geometry).coords), indexes=1))).item()]
+ dem_lat_thal_adj_elev = dem_lat_thal_adj_elev + [np.array(list(dem_lateral_thalweg_adj.sample((Point(segment.geometry).coords), indexes=1))).item()]
+ dem_thal_adj_elev = dem_thal_adj_elev + [np.array(list(dem_thalwegCond.sample((Point(segment.geometry).coords), indexes=1))).item()]
+ headwater_path = headwater_path + [segment.headwater_path]
+ # gpd.GeoDataFrame({**data, "source": "dem_m"})
+ dem_m_pts = gpd.GeoDataFrame({'stream_id': stream_ids, 'source': 'dem_m', 'elevation_m': dem_m_elev, 'headwater_path': headwater_path, 'index_count': index_count, 'geometry': split_points}, crs=path.crs, geometry='geometry')
+ dem_lat_thal_adj_pts = gpd.GeoDataFrame({'stream_id': stream_ids, 'source': 'dem_lat_thal_adj', 'elevation_m': dem_lat_thal_adj_elev, 'headwater_path': headwater_path, 'index_count': index_count, 'geometry': split_points}, crs=path.crs, geometry='geometry')
+ dem_thal_adj_pts = gpd.GeoDataFrame({'stream_id': stream_ids, 'source': 'thal_adj_dem', 'elevation_m': dem_thal_adj_elev, 'headwater_path': headwater_path, 'index_count': index_count, 'geometry': split_points}, crs=path.crs, geometry='geometry')
+ for raster in [dem_m_pts,dem_lat_thal_adj_pts,dem_thal_adj_pts]:
+ raster = raster.sort_values(by=['index_count'])
+ raster.set_index('index_count',inplace=True,drop=True)
+ raster = raster.reset_index(drop=True)
+ raster.index.names = ['index_count']
+ raster = raster.reset_index(drop=False)
+ thalweg_points = thalweg_points.append(raster,ignore_index = True)
+ del raster
+ del dem_m_pts,dem_lat_thal_adj_pts,dem_thal_adj_pts
+
+ del dem_lateral_thalweg_adj,dem_thalwegCond,dem_meters
+
+ try:
+ # Remove nodata_pts and convert elevation to ft
+ thalweg_points = thalweg_points.loc[thalweg_points.elevation_m > 0.0]
+ thalweg_points.elevation_m = np.round(thalweg_points.elevation_m,3)
+ thalweg_points['elevation_ft'] = np.round(thalweg_points.elevation_m*3.28084,3)
+
+ # Plot thalweg profile
+ plot_profile(thalweg_points, profile_plots_filename)
+
+ # Filter final thalweg ajdusted layer
+ thal_adj_points = thalweg_points.loc[thalweg_points.source=='thal_adj_dem'].copy()
+ # thal_adj_points.to_file(profile_gpkg_filename,driver=getDriver(profile_gpkg_filename))
+
+ # Identify significant rises/drops in elevation
+ thal_adj_points['elev_change'] = thal_adj_points.groupby(['headwater_path', 'source'])['elevation_m'].apply(lambda x: x - x.shift())
+ elev_changes = thal_adj_points.loc[(thal_adj_points.elev_change<=-lateral_elevation_threshold) | (thal_adj_points.elev_change>0.0)]
+
+ if not elev_changes.empty:
+ # elev_changes.to_csv(profile_table_filename,index=False)
+ elev_changes.to_file(profile_gpkg_filename,index=False,driver=getDriver(profile_gpkg_filename))
+
+
+ # Zoom in to plot only areas with steep elevation changes
+ # select_streams = elev_changes.stream_id.to_list()
+ # downstream_segments = [index + 1 for index in select_streams]
+ # upstream_segments = [index - 1 for index in select_streams]
+ # select_streams = list(set(upstream_segments + downstream_segments + select_streams))
+ # thal_adj_points_select = thal_adj_points.loc[thal_adj_points.stream_id.isin(select_streams)]
+ # plot_profile(thal_adj_points_select, profile_plots_filename_zoom)
+
+ except:
+ print(f"huc {huc} has {len(thalweg_points)} thalweg points")
+
+def get_downstream_segments(streams, headwater_col,headwater_id,flag_column,stream_id,stream_type):
+
+ streams[flag_column] = False
+ streams['downstream_count'] = -9
+ streams.loc[streams[headwater_col]==headwater_id,flag_column] = True
+ streams.loc[streams[headwater_col]==headwater_id,'downstream_count'] = 0
+ count = 0
+
+ Q = deque(streams.loc[streams[headwater_col]==headwater_id,stream_id].tolist())
+ visited = set()
+
+ while Q:
+ q = Q.popleft()
+
+ if q in visited:
+ continue
+
+ visited.add(q)
+
+ count = count + 1
+ if stream_type == 'burnline':
+
+ toNode,DnLevelPat = streams.loc[q,['ToNode','DnLevelPat']]
+ downstream_ids = streams.loc[streams['FromNode'] == toNode,:].index.tolist()
+
+ # If multiple downstream_ids are returned select the ids that are along the main flow path (i.e. exclude segments that are diversions)
+ if len(set(downstream_ids)) > 1: # special case: remove duplicate NHDPlusIDs
+
+ relevant_ids = [segment for segment in downstream_ids if DnLevelPat == streams.loc[segment,'LevelPathI']]
+
+ else:
+
+ relevant_ids = downstream_ids
+
+ elif stream_type == 'derived':
+
+ toNode = streams.loc[q,['NextDownID']].item()
+ relevant_ids = streams.loc[streams[stream_id] == toNode,:].index.tolist()
+
+ streams.loc[relevant_ids,flag_column] = True
+ streams.loc[relevant_ids,'downstream_count'] = count
+
+ for i in relevant_ids:
+
+ if i not in visited:
+ Q.append(i)
+
+ streams = streams.loc[streams[flag_column],:]
+
+ return streams
+
+
+def plot_profile(elevation_table,profile_plots_filename):
+ num_plots = len(elevation_table.headwater_path.unique())
+ unique_rasters = elevation_table.source.unique()
+ if num_plots > 3:
+ columns = int(np.ceil(num_plots / 3))
+ else:
+ columns = 1
+ # palette = dict(zip(unique_rasters, sns.color_palette(n_colors=len(unique_rasters))))
+ # palette.update({'dem_m':'gray'})
+ sns.set(style="ticks")
+ if len(unique_rasters) > 1:
+ g = sns.FacetGrid(elevation_table, col="headwater_path", hue="source", hue_order=['dem_m', 'dem_lat_thal_adj', 'thal_adj_dem'], sharex=False, sharey=False,col_wrap=columns)
+ else:
+ g = sns.FacetGrid(elevation_table, col="headwater_path", hue="source", sharex=False, sharey=False,col_wrap=columns)
+ g.map(sns.lineplot, "index_count", "elevation_ft", palette="tab20c")
+ g.set_axis_labels(x_var="Longitudinal Profile (index)", y_var="Elevation (ft)")
+ # Iterate thorugh each axis to get individual y-axis bounds
+ for ax in g.axes.flat:
+ mins = []
+ maxes = []
+ for line in ax.lines:
+ mins = mins + [min(line.get_ydata())]
+ maxes = maxes + [max(line.get_ydata())]
+ min_y = min(mins) - (max(maxes) - min(mins))/10
+ max_y = max(maxes) + (max(maxes) - min(mins))/10
+ ax.set_ylim(min_y,max_y)
+ # if len(unique_rasters) > 1:
+ # ax.lines[0].set_linestyle("--")
+ # ax.lines[0].set_color('gray')
+ # box = ax.get_position()
+ # ax.set_position([box.x0, box.y0 + box.height * 0.1,box.width, box.height * 0.9])
+ # Adjust the arrangement of the plots
+ # g.fig.tight_layout(w_pad=5) #w_pad=2
+ g.add_legend()
+ # plt.legend(bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0)
+ plt.subplots_adjust(bottom=0.25)
+ plt.savefig(profile_plots_filename)
+ plt.close()
+
+
+if __name__ == '__main__':
+
+ parser = argparse.ArgumentParser(description='generate rating curve plots and tables for FIM and USGS gages')
+ parser.add_argument('-fim_dir','--fim-dir', help='FIM output dir', required=True,type=str)
+ parser.add_argument('-output_dir','--output-dir', help='rating curves output folder', required=True,type=str)
+ # parser.add_argument('-rasters','--raster-list',help='list of rasters to be evaluated',required=True,type=str)
+ parser.add_argument('-stream_type','--stream-type',help='stream layer to be evaluated',required=True,type=str,choices=['derived','burnline'])
+ parser.add_argument('-point_density','--point-density',help='elevation sampling density',required=True,type=str,choices=['midpoints','all_points'])
+ parser.add_argument('-th','--elevation_threshold',help='significant elevation drop threshold in meters.',required=True)
+ parser.add_argument('-j','--number-of-jobs',help='number of workers',required=False,default=1,type=int)
+
+ args = vars(parser.parse_args())
+
+ fim_dir = args['fim_dir']
+ output_dir = args['output_dir']
+ # raster_list = args['raster_list']
+ stream_type = args['stream_type']
+ point_density = args['point_density']
+ number_of_jobs = args['number_of_jobs']
+
+ # dem_meters_dir = os.environ.get('dem_meters')
+
+ plots_dir = join(output_dir,'plots')
+ os.makedirs(plots_dir, exist_ok=True)
+ spatial_dir = os.path.join(output_dir,'spatial_layers')
+ os.makedirs(spatial_dir, exist_ok=True)
+
+ # Open log file
+ sys.__stdout__ = sys.stdout
+ log_file = open(join(output_dir,'thalweg_profile_comparison.log'),"w")
+ sys.stdout = log_file
+
+ procs_list = []
+ huc_list = os.listdir(fim_dir)
+ for huc in huc_list:
+ if huc != 'logs':
+
+ huc_dir = os.path.join(fim_dir,huc)
+ flows_grid_boolean_filename = os.path.join(huc_dir,'flows_grid_boolean.tif')
+ dem_meters_filename = os.path.join(huc_dir,'dem_meters.tif')
+ dem_lateral_thalweg_adj_filename = os.path.join(huc_dir,'dem_lateral_thalweg_adj.tif')
+ dem_thalwegCond_filename = os.path.join(huc_dir,'dem_thalwegCond.tif')
+ profile_plots_filename = os.path.join(plots_dir,f"profile_drop_plots_{huc}_{point_density}_{stream_type}.png")
+ profile_gpkg_filename = os.path.join(spatial_dir,f"thalweg_elevation_changes_{huc}_{point_density}_{stream_type}.gpkg")
+ profile_table_filename = os.path.join(spatial_dir,f"thalweg_elevation_changes_{huc}_{point_density}_{stream_type}.csv")
+
+ procs_list.append([huc_dir,stream_type,point_density,huc,dem_meters_filename,dem_lateral_thalweg_adj_filename,dem_thalwegCond_filename,profile_plots_filename,profile_gpkg_filename,profile_table_filename,flows_grid_boolean_filename])
+
+ # Initiate multiprocessing
+ print(f"Generating thalweg elevation profiles for {len(procs_list)} hucs using {number_of_jobs} jobs")
+ with Pool(processes=number_of_jobs) as pool:
+ # Get elevation values along thalweg for each headwater stream path
+ pool.map(compare_thalweg, procs_list)
+
+ # Append all elevation change spatial layers to a single gpkg
+ spatial_list = os.listdir(spatial_dir)
+ agg_thalweg_elevations_gpkg_fileName = os.path.join(output_dir, f"agg_thalweg_elevation_changes_{point_density}_{stream_type}.gpkg")
+ agg_thalweg_elevation_table_fileName = os.path.join(output_dir, f"agg_thalweg_elevation_changes_{point_density}_{stream_type}.csv")
+ for layer in spatial_list:
+
+ huc_gpd = gpd.read_file(os.path.join(spatial_dir,layer))
+
+ # Write aggregate layer
+ if os.path.isfile(agg_thalweg_elevations_gpkg_fileName):
+ huc_gpd.to_file(agg_thalweg_elevations_gpkg_fileName,driver=getDriver(agg_thalweg_elevations_gpkg_fileName),index=False, mode='a')
+ else:
+ huc_gpd.to_file(agg_thalweg_elevations_gpkg_fileName,driver=getDriver(agg_thalweg_elevations_gpkg_fileName),index=False)
+
+ del huc_gpd
+
+ # Create csv of elevation table
+ huc_table = gpd.read_file(agg_thalweg_elevations_gpkg_fileName)
+ huc_table.to_csv(agg_thalweg_elevation_table_fileName,index=False)
+
+ # Close log file
+ sys.stdout = sys.__stdout__
+ log_file.close()