diff --git a/.dockerignore b/.dockerignore deleted file mode 100644 index 8b1378917..000000000 --- a/.dockerignore +++ /dev/null @@ -1 +0,0 @@ - diff --git a/.gitignore b/.gitignore index 370f4f97d..e18bb6432 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,7 @@ config/** !config/*default* !config/*calibrated* !config/symbology/ +.vscode/ +**/.DS_Store +**/*_pytest.py + diff --git a/.isort.cfg b/.isort.cfg new file mode 100755 index 000000000..aa7e7666d --- /dev/null +++ b/.isort.cfg @@ -0,0 +1,3 @@ +[tool.isort] +profile = "black" +multi_line_output = 3 diff --git a/CHANGELOG.md b/CHANGELOG.md deleted file mode 100644 index 867435f65..000000000 --- a/CHANGELOG.md +++ /dev/null @@ -1,7 +0,0 @@ -All notable changes to this project will be documented in this file. -We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. - - -## v3.0.0.0 - 2020-12-22 - -The software released here builds on the flood inundation mapping capabilities demonstrated as part of the National Flood Interoperability Experiment, the Office of Water Prediction's Innovators Program and the National Water Center Summer Institute. The flood inundation mapping software implements the Height Above Nearest Drainage (HAND) algorithm and incorporates community feedback and lessons learned over several years. The software has been designed to meet the requirements set by stakeholders interested in flood prediction and has been developed in partnership with several entities across the water enterprise. diff --git a/Dockerfile.dev b/Dockerfile similarity index 82% rename from Dockerfile.dev rename to Dockerfile index a57389da8..ef56576b8 100644 --- a/Dockerfile.dev +++ b/Dockerfile @@ -4,19 +4,18 @@ WORKDIR /opt/builder ARG dataDir=/data ARG projectDir=/foss_fim ARG depDir=/dependencies -ARG taudemVersion=bf9417172225a9ce2462f11138c72c569c253a1a +ARG taudemVersion=98137bb6541a0d0077a9c95becfed4e56d0aa0ac ARG taudemVersion2=81f7a07cdd3721617a30ee4e087804fddbcffa88 -ENV DEBIAN_FRONTEND noninteractive ENV taudemDir=$depDir/taudem/bin ENV taudemDir2=$depDir/taudem_accelerated_flowDirections/taudem/build/bin -RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/* +RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/* RUN git clone https://github.com/dtarb/taudem.git RUN git clone https://github.com/fernandoa123/cybergis-toolkit.git taudem_accelerated_flowDirections -RUN apt-get update && apt-get install -y cmake mpich \ - libgtest-dev libboost-test-dev libnetcdf-dev && rm -rf /var/lib/apt/lists/* +RUN apt-get update --fix-missing && apt-get install -y cmake mpich \ + libgtest-dev libboost-test-dev libnetcdf-dev && rm -rf /var/lib/apt/lists/* ## Compile Main taudem repo ## RUN mkdir -p taudem/bin @@ -53,8 +52,8 @@ ARG dataDir=/data ARG projectDir=/foss_fim ARG depDir=/dependencies ENV inputDataDir=$dataDir/inputs -ENV outputDataDir=$dataDir/outputs -ENV libDir=$projectDir/lib +ENV outputDataDir=$dataDir/outputs +ENV srcDir=$projectDir/src ENV taudemDir=$depDir/taudem/bin ENV taudemDir2=$depDir/taudem_accelerated_flowDirections/taudem/build/bin @@ -71,11 +70,9 @@ RUN mkdir -p $depDir COPY --from=builder $depDir $depDir -RUN apt update --fix-missing -RUN apt install -y p7zip-full python3-pip time mpich=3.3.2-2build1 parallel=20161222-1.1 libgeos-dev=3.8.0-1build1 expect=5.45.4-2build1 +RUN apt update --fix-missing && apt install -y p7zip-full python3-pip time mpich=3.3.2-2build1 parallel=20161222-1.1 libgeos-dev=3.8.0-1build1 expect=5.45.4-2build1 -COPY install_grass.exp . -RUN ./install_grass.exp +RUN DEBIAN_FRONTEND=noninteractive apt install -y grass=7.8.2-1build3 grass-doc=7.8.2-1build3 RUN apt auto-remove @@ -93,5 +90,5 @@ COPY Pipfile.lock . RUN pip3 install pipenv && PIP_NO_CACHE_DIR=off PIP_NO_BINARY=shapely,pygeos pipenv install --system --deploy --ignore-pipfile ## RUN UMASK TO CHANGE DEFAULT PERMISSIONS ## -ADD ./lib/entrypoint.sh / +ADD ./src/entrypoint.sh / ENTRYPOINT ["/bin/bash", "/entrypoint.sh"] diff --git a/Dockerfile.prod b/Dockerfile.prod deleted file mode 100644 index 86fd4ec31..000000000 --- a/Dockerfile.prod +++ /dev/null @@ -1,99 +0,0 @@ -## Temporary image to build the libraries and only save the needed artifacts -FROM osgeo/gdal:ubuntu-full-3.1.2 AS builder -WORKDIR /opt/builder -ARG dataDir=/data -ARG projectDir=/foss_fim -ARG depDir=/dependencies -ARG taudemVersion=bf9417172225a9ce2462f11138c72c569c253a1a -ARG taudemVersion2=81f7a07cdd3721617a30ee4e087804fddbcffa88 -ENV DEBIAN_FRONTEND noninteractive -ENV taudemDir=$depDir/taudem/bin -ENV taudemDir2=$depDir/taudem_accelerated_flowDirections/taudem/build/bin - -RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/* - -RUN git clone https://github.com/dtarb/taudem.git -RUN git clone https://github.com/fernandoa123/cybergis-toolkit.git taudem_accelerated_flowDirections - -RUN apt-get update && apt-get install -y cmake mpich \ - libgtest-dev libboost-test-dev libnetcdf-dev && rm -rf /var/lib/apt/lists/* - -## Compile Main taudem repo ## -RUN mkdir -p taudem/bin -RUN cd taudem \ - && git checkout $taudemVersion \ - && cd src \ - && make - -## Compile taudem repo with accelerated flow directions ## -RUN cd taudem_accelerated_flowDirections/taudem \ - && git checkout $taudemVersion2 \ - && mkdir build \ - && cd build \ - && cmake .. \ - && make - -RUN mkdir -p $taudemDir -RUN mkdir -p $taudemDir2 - -## Move needed binaries to the next stage of the image -RUN cd taudem/bin && mv -t $taudemDir flowdircond aread8 threshold streamnet gagewatershed catchhydrogeo dinfdistdown -RUN cd taudem_accelerated_flowDirections/taudem/build/bin && mv -t $taudemDir2 d8flowdir dinfflowdir - - - - -############################################################################################### - - - -# Base Image that has GDAL, PROJ, etc -FROM osgeo/gdal:ubuntu-full-3.1.2 -ARG dataDir=/data -ARG projectDir=/foss_fim -ARG depDir=/dependencies -ENV inputDataDir=$dataDir/inputs -ENV outputDataDir=$dataDir/outputs -ENV libDir=$projectDir/lib -ENV taudemDir=$depDir/taudem/bin -ENV taudemDir2=$depDir/taudem_accelerated_flowDirections/taudem/build/bin - -ARG GroupID=1370800235 -ARG GroupName=fim -RUN addgroup --gid $GroupID $GroupName -ENV GID=$GroupID -ENV GN=$GroupName - -RUN mkdir -p $depDir -COPY --from=builder $depDir $depDir - -RUN apt update --fix-missing -RUN apt install -y p7zip-full python3-pip time mpich=3.3.2-2build1 parallel=20161222-1.1 libgeos-dev=3.8.0-1build1 expect=5.45.4-2build1 - -COPY install_grass.exp . -RUN ./install_grass.exp - -RUN apt auto-remove - -## adding environment variables for numba and python ## -ENV LC_ALL=C.UTF-8 -ENV LANG=C.UTF-8 -ENV PYTHONUNBUFFERED=TRUE - -## ADD TO PATHS ## -ENV PATH="$projectDir:${PATH}" - -## install python 3 modules ## -COPY requirements.txt . -RUN pip3 install --no-binary shapely --no-binary pygeos -r requirements.txt --no-cache-dir - -## Copy the source code to the image -COPY . $projectDir/ - -## Set user:group for running docker in detached mode -USER root:$GroupName - -# RUN UMASK TO CHANGE DEFAULT PERMISSIONS ## -ADD ./lib/entrypoint.sh / -ENTRYPOINT ["/bin/bash", "/entrypoint.sh"] - diff --git a/Pipfile b/Pipfile old mode 100644 new mode 100755 index c8fbfc0f6..b54b7daf7 --- a/Pipfile +++ b/Pipfile @@ -7,6 +7,7 @@ verify_ssl = true ipython = "*" [packages] +fiona = "==1.8.17" geopandas = "==0.8.1" numba = "==0.50.1" pandas = "==1.0.5" @@ -18,6 +19,10 @@ tqdm = "==4.48.0" Shapely = "==1.7.0" grass-session = "==0.5" seaborn = "==0.11.0" +python-dotenv = "*" +natsort = "*" +memory-profiler = "*" +pyproj = "==3.1.0" [requires] python_version = "3.8" diff --git a/Pipfile.lock b/Pipfile.lock index 5de3d9856..4307ca71f 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "5e7e798bab631b1e9012e79e84031f1b4852178692adc10bb8a071c9994c9d56" + "sha256": "f1296d064e5178d77fdf23130a571d8169821f2eeed04b37c1b2cf60fe6928ba" }, "pipfile-spec": 6, "requires": { @@ -25,18 +25,18 @@ }, "attrs": { "hashes": [ - "sha256:26b54ddbbb9ee1d34d5d3668dd37d6cf74990ab23c828c2888dccdceee395594", - "sha256:fce7fc47dfc976152e82d53ff92fa0407700c21acd20886a13777a0d20e655dc" + "sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1", + "sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==20.2.0" + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==21.2.0" }, "certifi": { "hashes": [ - "sha256:5930595817496dd21bb8dc35dad090f1c2cd0adfaf21204bf6732ca5d8ee34d3", - "sha256:8fc0819f1f30ba15bdb34cceffb9ef04d99f420f68eb75d901e9560b8749fc41" + "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872", + "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569" ], - "version": "==2020.6.20" + "version": "==2021.10.8" }, "click": { "hashes": [ @@ -55,19 +55,19 @@ }, "cligj": { "hashes": [ - "sha256:2bf2042a81be581d707f726aef5efbbd935a62af85d5521305026dabeb798f5d", - "sha256:394a0905fe6f36821b82f086bf8cc12fef20d99d0a3c26a8a92a9207a18b70c6", - "sha256:9881e3b71ff450a83412fadee026347ca15b99c623b9485593da4929cf884ca9" + "sha256:a4bc13d623356b373c2c27c53dbd9c68cae5d526270bfa71f6c6fa69669c6b27", + "sha256:c1ca117dbce1fe20a5809dc96f01e1c2840f6dcc939b3ddbb1111bf330ba82df" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' and python_version < '4'", - "version": "==0.7.0" + "version": "==0.7.2" }, "cycler": { "hashes": [ - "sha256:1d8a5ae1ff6c5cf9b93e8811e581232ad8920aeec647c37316ceac982b08cb2d", - "sha256:cd7b2d1018258d7247a71425e9f26463dfb444d411c39569972f4ce586b0c9d8" + "sha256:3a27e95f763a428a739d2add979fa7494c912a32c17c4c38c4d5f082cad165a3", + "sha256:9c87405839a19696e837b3b818fed3f5f69f16f1eec1a1ad77e043dcea9c772f" ], - "version": "==0.10.0" + "markers": "python_version >= '3.6'", + "version": "==0.11.0" }, "fiona": { "hashes": [ @@ -82,8 +82,17 @@ "sha256:d38a6ef59087b5a20ad7298608c5392e37705ff14d27b44435e04072bbf6632c", "sha256:fcfd8b67403de9b1cc53c045c72542e9f30cb15e617c89c41b928046a9b27daa" ], + "index": "pypi", "version": "==1.8.17" }, + "fonttools": { + "hashes": [ + "sha256:68071406009e7ef6a5fdcd85d95975cd6963867bb226f2b786bfffe15d1959ef", + "sha256:8c8f84131bf04f3b1dcf99b9763cec35c347164ab6ad006e18d2f99fcab05529" + ], + "markers": "python_version >= '3.7'", + "version": "==4.28.1" + }, "geopandas": { "hashes": [ "sha256:e28a729e44ac53c1891b54b1aca60e3bc0bb9e88ad0f2be8e301a03b9510f6e2", @@ -102,28 +111,53 @@ }, "kiwisolver": { "hashes": [ - "sha256:03662cbd3e6729f341a97dd2690b271e51a67a68322affab12a5b011344b973c", - "sha256:18d749f3e56c0480dccd1714230da0f328e6e4accf188dd4e6884bdd06bf02dd", - "sha256:247800260cd38160c362d211dcaf4ed0f7816afb5efe56544748b21d6ad6d17f", - "sha256:38d05c9ecb24eee1246391820ed7137ac42a50209c203c908154782fced90e44", - "sha256:443c2320520eda0a5b930b2725b26f6175ca4453c61f739fef7a5847bd262f74", - "sha256:4eadb361baf3069f278b055e3bb53fa189cea2fd02cb2c353b7a99ebb4477ef1", - "sha256:556da0a5f60f6486ec4969abbc1dd83cf9b5c2deadc8288508e55c0f5f87d29c", - "sha256:603162139684ee56bcd57acc74035fceed7dd8d732f38c0959c8bd157f913fec", - "sha256:60a78858580761fe611d22127868f3dc9f98871e6fdf0a15cc4203ed9ba6179b", - "sha256:63f55f490b958b6299e4e5bdac66ac988c3d11b7fafa522800359075d4fa56d1", - "sha256:7cc095a4661bdd8a5742aaf7c10ea9fac142d76ff1770a0f84394038126d8fc7", - "sha256:be046da49fbc3aa9491cc7296db7e8d27bcf0c3d5d1a40259c10471b014e4e0c", - "sha256:c31bc3c8e903d60a1ea31a754c72559398d91b5929fcb329b1c3a3d3f6e72113", - "sha256:c955791d80e464da3b471ab41eb65cf5a40c15ce9b001fdc5bbc241170de58ec", - "sha256:d069ef4b20b1e6b19f790d00097a5d5d2c50871b66d10075dab78938dc2ee2cf", - "sha256:d52b989dc23cdaa92582ceb4af8d5bcc94d74b2c3e64cd6785558ec6a879793e", - "sha256:e586b28354d7b6584d8973656a7954b1c69c93f708c0c07b77884f91640b7657", - "sha256:efcf3397ae1e3c3a4a0a0636542bcad5adad3b1dd3e8e629d0b6e201347176c8", - "sha256:fccefc0d36a38c57b7bd233a9b485e2f1eb71903ca7ad7adacad6c28a56d62d2" + "sha256:0007840186bacfaa0aba4466d5890334ea5938e0bb7e28078a0eb0e63b5b59d5", + "sha256:19554bd8d54cf41139f376753af1a644b63c9ca93f8f72009d50a2080f870f77", + "sha256:1d45d1c74f88b9f41062716c727f78f2a59a5476ecbe74956fafb423c5c87a76", + "sha256:1d819553730d3c2724582124aee8a03c846ec4362ded1034c16fb3ef309264e6", + "sha256:2210f28778c7d2ee13f3c2a20a3a22db889e75f4ec13a21072eabb5693801e84", + "sha256:22521219ca739654a296eea6d4367703558fba16f98688bd8ce65abff36eaa84", + "sha256:25405f88a37c5f5bcba01c6e350086d65e7465fd1caaf986333d2a045045a223", + "sha256:2b65bd35f3e06a47b5c30ea99e0c2b88f72c6476eedaf8cfbc8e66adb5479dcf", + "sha256:2ddb500a2808c100e72c075cbb00bf32e62763c82b6a882d403f01a119e3f402", + "sha256:2f8f6c8f4f1cff93ca5058d6ec5f0efda922ecb3f4c5fb76181f327decff98b8", + "sha256:30fa008c172355c7768159983a7270cb23838c4d7db73d6c0f6b60dde0d432c6", + "sha256:3dbb3cea20b4af4f49f84cffaf45dd5f88e8594d18568e0225e6ad9dec0e7967", + "sha256:4116ba9a58109ed5e4cb315bdcbff9838f3159d099ba5259c7c7fb77f8537492", + "sha256:44e6adf67577dbdfa2d9f06db9fbc5639afefdb5bf2b4dfec25c3a7fbc619536", + "sha256:5326ddfacbe51abf9469fe668944bc2e399181a2158cb5d45e1d40856b2a0589", + "sha256:70adc3658138bc77a36ce769f5f183169bc0a2906a4f61f09673f7181255ac9b", + "sha256:72be6ebb4e92520b9726d7146bc9c9b277513a57a38efcf66db0620aec0097e0", + "sha256:7843b1624d6ccca403a610d1277f7c28ad184c5aa88a1750c1a999754e65b439", + "sha256:7ba5a1041480c6e0a8b11a9544d53562abc2d19220bfa14133e0cdd9967e97af", + "sha256:80efd202108c3a4150e042b269f7c78643420cc232a0a771743bb96b742f838f", + "sha256:82f49c5a79d3839bc8f38cb5f4bfc87e15f04cbafa5fbd12fb32c941cb529cfb", + "sha256:83d2c9db5dfc537d0171e32de160461230eb14663299b7e6d18ca6dca21e4977", + "sha256:8d93a1095f83e908fc253f2fb569c2711414c0bfd451cab580466465b235b470", + "sha256:8dc3d842fa41a33fe83d9f5c66c0cc1f28756530cd89944b63b072281e852031", + "sha256:9661a04ca3c950a8ac8c47f53cbc0b530bce1b52f516a1e87b7736fec24bfff0", + "sha256:a498bcd005e8a3fedd0022bb30ee0ad92728154a8798b703f394484452550507", + "sha256:a7a4cf5bbdc861987a7745aed7a536c6405256853c94abc9f3287c3fa401b174", + "sha256:b5074fb09429f2b7bc82b6fb4be8645dcbac14e592128beeff5461dcde0af09f", + "sha256:b6a5431940f28b6de123de42f0eb47b84a073ee3c3345dc109ad550a3307dd28", + "sha256:ba677bcaff9429fd1bf01648ad0901cea56c0d068df383d5f5856d88221fe75b", + "sha256:bcadb05c3d4794eb9eee1dddf1c24215c92fb7b55a80beae7a60530a91060560", + "sha256:bf7eb45d14fc036514c09554bf983f2a72323254912ed0c3c8e697b62c4c158f", + "sha256:c358721aebd40c243894298f685a19eb0491a5c3e0b923b9f887ef1193ddf829", + "sha256:c4550a359c5157aaf8507e6820d98682872b9100ce7607f8aa070b4b8af6c298", + "sha256:c6572c2dab23c86a14e82c245473d45b4c515314f1f859e92608dcafbd2f19b8", + "sha256:cba430db673c29376135e695c6e2501c44c256a81495da849e85d1793ee975ad", + "sha256:dedc71c8eb9c5096037766390172c34fb86ef048b8e8958b4e484b9e505d66bc", + "sha256:e6f5eb2f53fac7d408a45fbcdeda7224b1cfff64919d0f95473420a931347ae9", + "sha256:ec2eba188c1906b05b9b49ae55aae4efd8150c61ba450e6721f64620c50b59eb", + "sha256:ee040a7de8d295dbd261ef2d6d3192f13e2b08ec4a954de34a6fb8ff6422e24c", + "sha256:eedd3b59190885d1ebdf6c5e0ca56828beb1949b4dfe6e5d0256a461429ac386", + "sha256:f441422bb313ab25de7b3dbfd388e790eceb76ce01a18199ec4944b369017009", + "sha256:f8eb7b6716f5b50e9c06207a14172cf2de201e41912ebe732846c02c830455b9", + "sha256:fc4453705b81d03568d5b808ad8f09c77c47534f6ac2e72e733f9ca4714aa75c" ], - "markers": "python_version >= '3.6'", - "version": "==1.2.0" + "markers": "python_version >= '3.7'", + "version": "==1.3.2" }, "llvmlite": { "hashes": [ @@ -149,27 +183,51 @@ }, "matplotlib": { "hashes": [ - "sha256:06866c138d81a593b535d037b2727bec9b0818cadfe6a81f6ec5715b8dd38a89", - "sha256:16b241c3d17be786966495229714de37de04472da472277869b8d5b456a8df00", - "sha256:27f9de4784ae6fb97679556c5542cf36c0751dccb4d6407f7c62517fa2078868", - "sha256:2f5eefc17dc2a71318d5a3496313be5c351c0731e8c4c6182c9ac3782cfc4076", - "sha256:371518c769d84af8ec9b7dcb871ac44f7a67ef126dd3a15c88c25458e6b6d205", - "sha256:3d2edbf59367f03cd9daf42939ca06383a7d7803e3993eb5ff1bee8e8a3fbb6b", - "sha256:3fb0409754b26f48045bacd6818e44e38ca9338089f8ba689e2f9344ff2847c7", - "sha256:548cfe81476dbac44db96e9c0b074b6fb333b4d1f12b1ae68dbed47e45166384", - "sha256:57be9e21073fc367237b03ecac0d9e4b8ddbe38e86ec4a316857d8d93ac9286c", - "sha256:5ccecb5f78b51b885f0028b646786889f49c54883e554fca41a2a05998063f23", - "sha256:69cf76d673682140f46c6cb5e073332c1f1b2853c748dc1cb04f7d00023567f7", - "sha256:793e061054662aa27acaff9201cdd510a698541c6e8659eeceb31d66c16facc6", - "sha256:799c421bc245a0749c1515b6dea6dc02db0a8c1f42446a0f03b3b82a60a900dc", - "sha256:8bc1d3284dee001f41ec98f59675f4d723683e1cc082830b440b5f081d8e0ade", - "sha256:a522de31e07ed7d6f954cda3fbd5ca4b8edbfc592a821a7b00291be6f843292e", - "sha256:be2f0ec62e0939a9dcfd3638c140c5a74fc929ee3fd1f31408ab8633db6e1523", - "sha256:c5d0c2ae3e3ed4e9f46b7c03b40d443601012ffe8eb8dfbb2bd6b2d00509f797", - "sha256:f0268613073df055bcc6a490de733012f2cf4fe191c1adb74e41cec8add1a165" + "sha256:0abf8b51cc6d3ba34d1b15b26e329f23879848a0cf1216954c1f432ffc7e1af7", + "sha256:0e020a42f3338823a393dd2f80e39a2c07b9f941dfe2c778eb104eeb33d60bb5", + "sha256:13930a0c9bec0fd25f43c448b047a21af1353328b946f044a8fc3be077c6b1a8", + "sha256:153a0cf6a6ff4f406a0600d2034710c49988bacc6313d193b32716f98a697580", + "sha256:18f6e52386300db5cc4d1e9019ad9da2e80658bab018834d963ebb0aa5355095", + "sha256:2089b9014792dcc87bb1d620cde847913338abf7d957ef05587382b0cb76d44e", + "sha256:2eea16883aa7724c95eea0eb473ab585c6cf66f0e28f7f13e63deb38f4fd6d0f", + "sha256:38892a254420d95594285077276162a5e9e9c30b6da08bdc2a4d53331ad9a6fa", + "sha256:4b018ea6f26424a0852eb60eb406420d9f0d34f65736ea7bbfbb104946a66d86", + "sha256:65f877882b7ddede7090c7d87be27a0f4720fe7fc6fddd4409c06e1aa0f1ae8d", + "sha256:666d717a4798eb9c5d3ae83fe80c7bc6ed696b93e879cb01cb24a74155c73612", + "sha256:66b172610db0ececebebb09d146f54205f87c7b841454e408fba854764f91bdd", + "sha256:6db02c5605f063b67780f4d5753476b6a4944343284aa4e93c5e8ff6e9ec7f76", + "sha256:6e0e6b2111165522ad336705499b1f968c34a9e84d05d498ee5af0b5697d1efe", + "sha256:71a1851111f23f82fc43d2b6b2bfdd3f760579a664ebc939576fe21cc6133d01", + "sha256:7a7cb59ebd63a8ac4542ec1c61dd08724f82ec3aa7bb6b4b9e212d43c611ce3d", + "sha256:7baf23adb698d8c6ca7339c9dde00931bc47b2dd82fa912827fef9f93db77f5e", + "sha256:970aa97297537540369d05fe0fd1bb952593f9ab696c9b427c06990a83e2418b", + "sha256:9bac8eb1eccef540d7f4e844b6313d9f7722efd48c07e1b4bfec1056132127fd", + "sha256:a07ff2565da72a7b384a9e000b15b6b8270d81370af8a3531a16f6fbcee023cc", + "sha256:a0dcaf5648cecddc328e81a0421821a1f65a1d517b20746c94a1f0f5c36fb51a", + "sha256:a0ea10faa3bab0714d3a19c7e0921279a68d57552414d6eceaea99f97d7735db", + "sha256:a5b62d1805cc83d755972033c05cea78a1e177a159fc84da5c9c4ab6303ccbd9", + "sha256:a6cef5b31e27c31253c0f852b629a38d550ae66ec6850129c49d872f9ee428cb", + "sha256:a7bf8b05c214d32fb7ca7c001fde70b9b426378e897b0adbf77b85ea3569d56a", + "sha256:ac17a7e7b06ee426a4989f0b7f24ab1a592e39cdf56353a90f4e998bc0bf44d6", + "sha256:b3b687e905da32e5f2e5f16efa713f5d1fcd9fb8b8c697895de35c91fedeb086", + "sha256:b5e439d9e55d645f2a4dca63e2f66d68fe974c405053b132d61c7e98c25dfeb2", + "sha256:ba107add08e12600b072cf3c47aaa1ab85dd4d3c48107a5d3377d1bf80f8b235", + "sha256:d092b7ba63182d2dd427904e3eb58dd5c46ec67c5968de14a4b5007010a3a4cc", + "sha256:dc8c5c23e7056e126275dbf29efba817b3d94196690930d0968873ac3a94ab82", + "sha256:df0042cab69f4d246f4cb8fc297770ac4ae6ec2983f61836b04a117722037dcd", + "sha256:ee3d9ff16d749a9aa521bd7d86f0dbf256b2d2ac8ce31b19e4d2c86d2f2ff0b6", + "sha256:f23fbf70d2e80f4e03a83fc1206a8306d9bc50482fee4239f10676ce7e470c83", + "sha256:ff5d9fe518ad2de14ce82ab906b6ab5c2b0c7f4f984400ff8a7a905daa580a0a" ], - "markers": "python_version >= '3.6'", - "version": "==3.3.2" + "markers": "python_version >= '3.7'", + "version": "==3.5.0" + }, + "memory-profiler": { + "hashes": [ + "sha256:01385ac0fec944fcf7969814ec4406c6d8a9c66c079d09276723c5a7680f44e5" + ], + "index": "pypi", + "version": "==0.58.0" }, "munch": { "hashes": [ @@ -178,6 +236,14 @@ ], "version": "==2.5.0" }, + "natsort": { + "hashes": [ + "sha256:5f5f4ea471d655b1b1611eef1cf0c6d3397095d2d3a1aab7098d6a50e4c3901a", + "sha256:a0a4fd71aee20a6d648da61e01180a63f7268e69983d0440bd3ad80ef1ba6981" + ], + "index": "pypi", + "version": "==8.0.0" + }, "numba": { "hashes": [ "sha256:24852c21fbf7edf9e000eeec9fbd1b24d1ca17c86ae449b06a3707bcdec95479", @@ -205,35 +271,47 @@ }, "numpy": { "hashes": [ - "sha256:04c7d4ebc5ff93d9822075ddb1751ff392a4375e5885299445fcebf877f179d5", - "sha256:0bfd85053d1e9f60234f28f63d4a5147ada7f432943c113a11afcf3e65d9d4c8", - "sha256:0c66da1d202c52051625e55a249da35b31f65a81cb56e4c69af0dfb8fb0125bf", - "sha256:0d310730e1e793527065ad7dde736197b705d0e4c9999775f212b03c44a8484c", - "sha256:1669ec8e42f169ff715a904c9b2105b6640f3f2a4c4c2cb4920ae8b2785dac65", - "sha256:2117536e968abb7357d34d754e3733b0d7113d4c9f1d921f21a3d96dec5ff716", - "sha256:3733640466733441295b0d6d3dcbf8e1ffa7e897d4d82903169529fd3386919a", - "sha256:4339741994c775396e1a274dba3609c69ab0f16056c1077f18979bec2a2c2e6e", - "sha256:51ee93e1fac3fe08ef54ff1c7f329db64d8a9c5557e6c8e908be9497ac76374b", - "sha256:54045b198aebf41bf6bf4088012777c1d11703bf74461d70cd350c0af2182e45", - "sha256:58d66a6b3b55178a1f8a5fe98df26ace76260a70de694d99577ddeab7eaa9a9d", - "sha256:59f3d687faea7a4f7f93bd9665e5b102f32f3fa28514f15b126f099b7997203d", - "sha256:62139af94728d22350a571b7c82795b9d59be77fc162414ada6c8b6a10ef5d02", - "sha256:7118f0a9f2f617f921ec7d278d981244ba83c85eea197be7c5a4f84af80a9c3c", - "sha256:7c6646314291d8f5ea900a7ea9c4261f834b5b62159ba2abe3836f4fa6705526", - "sha256:967c92435f0b3ba37a4257c48b8715b76741410467e2bdb1097e8391fccfae15", - "sha256:9a3001248b9231ed73894c773142658bab914645261275f675d86c290c37f66d", - "sha256:aba1d5daf1144b956bc87ffb87966791f5e9f3e1f6fab3d7f581db1f5b598f7a", - "sha256:addaa551b298052c16885fc70408d3848d4e2e7352de4e7a1e13e691abc734c1", - "sha256:b594f76771bc7fc8a044c5ba303427ee67c17a09b36e1fa32bde82f5c419d17a", - "sha256:c35a01777f81e7333bcf276b605f39c872e28295441c265cd0c860f4b40148c1", - "sha256:cebd4f4e64cfe87f2039e4725781f6326a61f095bc77b3716502bed812b385a9", - "sha256:d526fa58ae4aead839161535d59ea9565863bb0b0bdb3cc63214613fb16aced4", - "sha256:d7ac33585e1f09e7345aa902c281bd777fdb792432d27fca857f39b70e5dd31c", - "sha256:e6ddbdc5113628f15de7e4911c02aed74a4ccff531842c583e5032f6e5a179bd", - "sha256:eb25c381d168daf351147713f49c626030dcff7a393d5caa62515d415a6071d8" + "sha256:0b78ecfa070460104934e2caf51694ccd00f37d5e5dbe76f021b1b0b0d221823", + "sha256:1247ef28387b7bb7f21caf2dbe4767f4f4175df44d30604d42ad9bd701ebb31f", + "sha256:1403b4e2181fc72664737d848b60e65150f272fe5a1c1cbc16145ed43884065a", + "sha256:170b2a0805c6891ca78c1d96ee72e4c3ed1ae0a992c75444b6ab20ff038ba2cd", + "sha256:2e4ed57f45f0aa38beca2a03b6532e70e548faf2debbeb3291cfc9b315d9be8f", + "sha256:32fe5b12061f6446adcbb32cf4060a14741f9c21e15aaee59a207b6ce6423469", + "sha256:34f3456f530ae8b44231c63082c8899fe9c983fd9b108c997c4b1c8c2d435333", + "sha256:4c9c23158b87ed0e70d9a50c67e5c0b3f75bcf2581a8e34668d4e9d7474d76c6", + "sha256:5d95668e727c75b3f5088ec7700e260f90ec83f488e4c0aaccb941148b2cd377", + "sha256:615d4e328af7204c13ae3d4df7615a13ff60a49cb0d9106fde07f541207883ca", + "sha256:69077388c5a4b997442b843dbdc3a85b420fb693ec8e33020bb24d647c164fa5", + "sha256:74b85a17528ca60cf98381a5e779fc0264b4a88b46025e6bcbe9621f46bb3e63", + "sha256:81225e58ef5fce7f1d80399575576fc5febec79a8a2742e8ef86d7b03beef49f", + "sha256:8890b3360f345e8360133bc078d2dacc2843b6ee6059b568781b15b97acbe39f", + "sha256:92aafa03da8658609f59f18722b88f0a73a249101169e28415b4fa148caf7e41", + "sha256:9864424631775b0c052f3bd98bc2712d131b3e2cd95d1c0c68b91709170890b0", + "sha256:9e6f5f50d1eff2f2f752b3089a118aee1ea0da63d56c44f3865681009b0af162", + "sha256:a3deb31bc84f2b42584b8c4001c85d1934dbfb4030827110bc36bfd11509b7bf", + "sha256:ad010846cdffe7ec27e3f933397f8a8d6c801a48634f419e3d075db27acf5880", + "sha256:b1e2312f5b8843a3e4e8224b2b48fe16119617b8fc0a54df8f50098721b5bed2", + "sha256:bc988afcea53e6156546e5b2885b7efab089570783d9d82caf1cfd323b0bb3dd", + "sha256:c449eb870616a7b62e097982c622d2577b3dbc800aaf8689254ec6e0197cbf1e", + "sha256:c74c699b122918a6c4611285cc2cad4a3aafdb135c22a16ec483340ef97d573c", + "sha256:c885bfc07f77e8fee3dc879152ba993732601f1f11de248d4f357f0ffea6a6d4", + "sha256:e3c3e990274444031482a31280bf48674441e0a5b55ddb168f3a6db3e0c38ec8", + "sha256:e4799be6a2d7d3c33699a6f77201836ac975b2e1b98c2a07f66a38f499cb50ce", + "sha256:e6c76a87633aa3fa16614b61ccedfae45b91df2767cf097aa9c933932a7ed1e0", + "sha256:e89717274b41ebd568cd7943fc9418eeb49b1785b66031bc8a7f6300463c5898", + "sha256:f5162ec777ba7138906c9c274353ece5603646c6965570d82905546579573f73", + "sha256:fde96af889262e85aa033f8ee1d3241e32bf36228318a61f1ace579df4e8170d" + ], + "markers": "python_version < '3.11' and python_version >= '3.7'", + "version": "==1.21.4" + }, + "packaging": { + "hashes": [ + "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb", + "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522" ], "markers": "python_version >= '3.6'", - "version": "==1.19.2" + "version": "==21.3" }, "pandas": { "hashes": [ @@ -259,37 +337,84 @@ }, "pillow": { "hashes": [ - "sha256:006de60d7580d81f4a1a7e9f0173dc90a932e3905cc4d47ea909bc946302311a", - "sha256:0a2e8d03787ec7ad71dc18aec9367c946ef8ef50e1e78c71f743bc3a770f9fae", - "sha256:0eeeae397e5a79dc088d8297a4c2c6f901f8fb30db47795113a4a605d0f1e5ce", - "sha256:11c5c6e9b02c9dac08af04f093eb5a2f84857df70a7d4a6a6ad461aca803fb9e", - "sha256:2fb113757a369a6cdb189f8df3226e995acfed0a8919a72416626af1a0a71140", - "sha256:4b0ef2470c4979e345e4e0cc1bbac65fda11d0d7b789dbac035e4c6ce3f98adb", - "sha256:59e903ca800c8cfd1ebe482349ec7c35687b95e98cefae213e271c8c7fffa021", - "sha256:5abd653a23c35d980b332bc0431d39663b1709d64142e3652890df4c9b6970f6", - "sha256:5f9403af9c790cc18411ea398a6950ee2def2a830ad0cfe6dc9122e6d528b302", - "sha256:6b4a8fd632b4ebee28282a9fef4c341835a1aa8671e2770b6f89adc8e8c2703c", - "sha256:6c1aca8231625115104a06e4389fcd9ec88f0c9befbabd80dc206c35561be271", - "sha256:795e91a60f291e75de2e20e6bdd67770f793c8605b553cb6e4387ce0cb302e09", - "sha256:7ba0ba61252ab23052e642abdb17fd08fdcfdbbf3b74c969a30c58ac1ade7cd3", - "sha256:7c9401e68730d6c4245b8e361d3d13e1035cbc94db86b49dc7da8bec235d0015", - "sha256:81f812d8f5e8a09b246515fac141e9d10113229bc33ea073fec11403b016bcf3", - "sha256:895d54c0ddc78a478c80f9c438579ac15f3e27bf442c2a9aa74d41d0e4d12544", - "sha256:8de332053707c80963b589b22f8e0229f1be1f3ca862a932c1bcd48dafb18dd8", - "sha256:92c882b70a40c79de9f5294dc99390671e07fc0b0113d472cbea3fde15db1792", - "sha256:95edb1ed513e68bddc2aee3de66ceaf743590bf16c023fb9977adc4be15bd3f0", - "sha256:b63d4ff734263ae4ce6593798bcfee6dbfb00523c82753a3a03cbc05555a9cc3", - "sha256:bd7bf289e05470b1bc74889d1466d9ad4a56d201f24397557b6f65c24a6844b8", - "sha256:cc3ea6b23954da84dbee8025c616040d9aa5eaf34ea6895a0a762ee9d3e12e11", - "sha256:cc9ec588c6ef3a1325fa032ec14d97b7309db493782ea8c304666fb10c3bd9a7", - "sha256:d3d07c86d4efa1facdf32aa878bd508c0dc4f87c48125cc16b937baa4e5b5e11", - "sha256:d8a96747df78cda35980905bf26e72960cba6d355ace4780d4bdde3b217cdf1e", - "sha256:e38d58d9138ef972fceb7aeec4be02e3f01d383723965bfcef14d174c8ccd039", - "sha256:eb472586374dc66b31e36e14720747595c2b265ae962987261f044e5cce644b5", - "sha256:fbd922f702582cb0d71ef94442bfca57624352622d75e3be7a1e7e9360b07e72" + "sha256:066f3999cb3b070a95c3652712cffa1a748cd02d60ad7b4e485c3748a04d9d76", + "sha256:0a0956fdc5defc34462bb1c765ee88d933239f9a94bc37d132004775241a7585", + "sha256:0b052a619a8bfcf26bd8b3f48f45283f9e977890263e4571f2393ed8898d331b", + "sha256:1394a6ad5abc838c5cd8a92c5a07535648cdf6d09e8e2d6df916dfa9ea86ead8", + "sha256:1bc723b434fbc4ab50bb68e11e93ce5fb69866ad621e3c2c9bdb0cd70e345f55", + "sha256:244cf3b97802c34c41905d22810846802a3329ddcb93ccc432870243211c79fc", + "sha256:25a49dc2e2f74e65efaa32b153527fc5ac98508d502fa46e74fa4fd678ed6645", + "sha256:2e4440b8f00f504ee4b53fe30f4e381aae30b0568193be305256b1462216feff", + "sha256:3862b7256046fcd950618ed22d1d60b842e3a40a48236a5498746f21189afbbc", + "sha256:3eb1ce5f65908556c2d8685a8f0a6e989d887ec4057326f6c22b24e8a172c66b", + "sha256:3f97cfb1e5a392d75dd8b9fd274d205404729923840ca94ca45a0af57e13dbe6", + "sha256:493cb4e415f44cd601fcec11c99836f707bb714ab03f5ed46ac25713baf0ff20", + "sha256:4acc0985ddf39d1bc969a9220b51d94ed51695d455c228d8ac29fcdb25810e6e", + "sha256:5503c86916d27c2e101b7f71c2ae2cddba01a2cf55b8395b0255fd33fa4d1f1a", + "sha256:5b7bb9de00197fb4261825c15551adf7605cf14a80badf1761d61e59da347779", + "sha256:5e9ac5f66616b87d4da618a20ab0a38324dbe88d8a39b55be8964eb520021e02", + "sha256:620582db2a85b2df5f8a82ddeb52116560d7e5e6b055095f04ad828d1b0baa39", + "sha256:62cc1afda735a8d109007164714e73771b499768b9bb5afcbbee9d0ff374b43f", + "sha256:70ad9e5c6cb9b8487280a02c0ad8a51581dcbbe8484ce058477692a27c151c0a", + "sha256:72b9e656e340447f827885b8d7a15fc8c4e68d410dc2297ef6787eec0f0ea409", + "sha256:72cbcfd54df6caf85cc35264c77ede902452d6df41166010262374155947460c", + "sha256:792e5c12376594bfcb986ebf3855aa4b7c225754e9a9521298e460e92fb4a488", + "sha256:7b7017b61bbcdd7f6363aeceb881e23c46583739cb69a3ab39cb384f6ec82e5b", + "sha256:81f8d5c81e483a9442d72d182e1fb6dcb9723f289a57e8030811bac9ea3fef8d", + "sha256:82aafa8d5eb68c8463b6e9baeb4f19043bb31fefc03eb7b216b51e6a9981ae09", + "sha256:84c471a734240653a0ec91dec0996696eea227eafe72a33bd06c92697728046b", + "sha256:8c803ac3c28bbc53763e6825746f05cc407b20e4a69d0122e526a582e3b5e153", + "sha256:93ce9e955cc95959df98505e4608ad98281fff037350d8c2671c9aa86bcf10a9", + "sha256:9a3e5ddc44c14042f0844b8cf7d2cd455f6cc80fd7f5eefbe657292cf601d9ad", + "sha256:a4901622493f88b1a29bd30ec1a2f683782e57c3c16a2dbc7f2595ba01f639df", + "sha256:a5a4532a12314149d8b4e4ad8ff09dde7427731fcfa5917ff16d0291f13609df", + "sha256:b8831cb7332eda5dc89b21a7bce7ef6ad305548820595033a4b03cf3091235ed", + "sha256:b8e2f83c56e141920c39464b852de3719dfbfb6e3c99a2d8da0edf4fb33176ed", + "sha256:c70e94281588ef053ae8998039610dbd71bc509e4acbc77ab59d7d2937b10698", + "sha256:c8a17b5d948f4ceeceb66384727dde11b240736fddeda54ca740b9b8b1556b29", + "sha256:d82cdb63100ef5eedb8391732375e6d05993b765f72cb34311fab92103314649", + "sha256:d89363f02658e253dbd171f7c3716a5d340a24ee82d38aab9183f7fdf0cdca49", + "sha256:d99ec152570e4196772e7a8e4ba5320d2d27bf22fdf11743dd882936ed64305b", + "sha256:ddc4d832a0f0b4c52fff973a0d44b6c99839a9d016fe4e6a1cb8f3eea96479c2", + "sha256:e3dacecfbeec9a33e932f00c6cd7996e62f53ad46fbe677577394aaa90ee419a", + "sha256:eb9fc393f3c61f9054e1ed26e6fe912c7321af2f41ff49d3f83d05bacf22cc78" ], "markers": "python_version >= '3.6'", - "version": "==8.0.1" + "version": "==8.4.0" + }, + "psutil": { + "hashes": [ + "sha256:0066a82f7b1b37d334e68697faba68e5ad5e858279fd6351c8ca6024e8d6ba64", + "sha256:02b8292609b1f7fcb34173b25e48d0da8667bc85f81d7476584d889c6e0f2131", + "sha256:0ae6f386d8d297177fd288be6e8d1afc05966878704dad9847719650e44fc49c", + "sha256:0c9ccb99ab76025f2f0bbecf341d4656e9c1351db8cc8a03ccd62e318ab4b5c6", + "sha256:0dd4465a039d343925cdc29023bb6960ccf4e74a65ad53e768403746a9207023", + "sha256:12d844996d6c2b1d3881cfa6fa201fd635971869a9da945cf6756105af73d2df", + "sha256:1bff0d07e76114ec24ee32e7f7f8d0c4b0514b3fae93e3d2aaafd65d22502394", + "sha256:245b5509968ac0bd179287d91210cd3f37add77dad385ef238b275bad35fa1c4", + "sha256:28ff7c95293ae74bf1ca1a79e8805fcde005c18a122ca983abf676ea3466362b", + "sha256:36b3b6c9e2a34b7d7fbae330a85bf72c30b1c827a4366a07443fc4b6270449e2", + "sha256:52de075468cd394ac98c66f9ca33b2f54ae1d9bff1ef6b67a212ee8f639ec06d", + "sha256:5da29e394bdedd9144c7331192e20c1f79283fb03b06e6abd3a8ae45ffecee65", + "sha256:61f05864b42fedc0771d6d8e49c35f07efd209ade09a5afe6a5059e7bb7bf83d", + "sha256:6223d07a1ae93f86451d0198a0c361032c4c93ebd4bf6d25e2fb3edfad9571ef", + "sha256:6323d5d845c2785efb20aded4726636546b26d3b577aded22492908f7c1bdda7", + "sha256:6ffe81843131ee0ffa02c317186ed1e759a145267d54fdef1bc4ea5f5931ab60", + "sha256:74f2d0be88db96ada78756cb3a3e1b107ce8ab79f65aa885f76d7664e56928f6", + "sha256:74fb2557d1430fff18ff0d72613c5ca30c45cdbfcddd6a5773e9fc1fe9364be8", + "sha256:90d4091c2d30ddd0a03e0b97e6a33a48628469b99585e2ad6bf21f17423b112b", + "sha256:90f31c34d25b1b3ed6c40cdd34ff122b1887a825297c017e4cbd6796dd8b672d", + "sha256:99de3e8739258b3c3e8669cb9757c9a861b2a25ad0955f8e53ac662d66de61ac", + "sha256:c6a5fd10ce6b6344e616cf01cc5b849fa8103fbb5ba507b6b2dee4c11e84c935", + "sha256:ce8b867423291cb65cfc6d9c4955ee9bfc1e21fe03bb50e177f2b957f1c2469d", + "sha256:d225cd8319aa1d3c85bf195c4e07d17d3cd68636b8fc97e6cf198f782f99af28", + "sha256:ea313bb02e5e25224e518e4352af4bf5e062755160f77e4b1767dd5ccb65f876", + "sha256:ea372bcc129394485824ae3e3ddabe67dc0b118d262c568b4d2602a7070afdb0", + "sha256:f4634b033faf0d968bb9220dd1c793b897ab7f1189956e1aa9eae752527127d3", + "sha256:fcc01e900c1d7bee2a37e5d6e4f9194760a93597c97fee89c4ae51701de03563" + ], + "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==5.8.0" }, "pygeos": { "hashes": [ @@ -315,55 +440,59 @@ }, "pyparsing": { "hashes": [ - "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1", - "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b" + "sha256:04ff808a5b90911829c55c4e26f75fa5ca8a2f5f36aa3a51f68e27033341d3e4", + "sha256:d9bdec0013ef1eb5a84ab39a3b3868911598afa494f5faa038647101504e2b81" ], - "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==2.4.7" + "markers": "python_version >= '3.6'", + "version": "==3.0.6" }, "pyproj": { "hashes": [ - "sha256:2518d1606e2229b82318e704b40290e02a2a52d77b40cdcb2978973d6fc27b20", - "sha256:33a5d1cfbb40a019422eb80709a0e270704390ecde7278fdc0b88f3647c56a39", - "sha256:33c1c2968a4f4f87d517c4275a18b557e5c13907cf2609371fadea8463c3ba05", - "sha256:3fef83a01c1e86dd9fa99d8214f749837cfafc34d9d6230b4b0a998fa7a68a1a", - "sha256:451a3d1c563b672458029ebc04acbb3266cd8b3025268eb871a9176dc3638911", - "sha256:457ad3856014ac26af1d86def6dc8cf69c1fa377b6e2fd6e97912d51cf66bdbe", - "sha256:4f5b02b4abbd41610397c635b275a8ee4a2b5bc72a75572b98ac6ae7befa471e", - "sha256:6a212d0e5c7efa33d039f0c8b0a489e2204fcd28b56206567852ad7f5f2a653e", - "sha256:6f3f36440ea61f5f6da4e6beb365dddcbe159815450001d9fb753545affa45ff", - "sha256:93cbad7b699e8e80def7de80c350617f35e6a0b82862f8ce3c014657c25fdb3c", - "sha256:9f097e8f341a162438918e908be86d105a28194ff6224633b2e9616c5031153f", - "sha256:a13e5731b3a360ee7fbd1e9199ec9203fafcece8ebd0b1351f16d0a90cad6828", - "sha256:a6ac4861979cd05a0f5400fefa41d26c0269a5fb8237618aef7c998907db39e1", - "sha256:a8b7c8accdc61dac8e91acab7c1f7b4590d1e102f2ee9b1f1e6399fad225958e", - "sha256:adacb67a9f71fb54ca1b887a6ab20f32dd536fcdf2acec84a19e25ad768f7965", - "sha256:bc2f3a15d065e206d63edd2cc4739aa0a35c05338ee276ab1dc72f56f1944bda", - "sha256:cbf6ccf990860b06c5262ff97c4b78e1d07883981635cd53a6aa438a68d92945", - "sha256:d87836be6b720fb4d9c112136aa47621b6ca09a554e645c1081561eb8e2fa1f4", - "sha256:d90a5d1fdd066b0e9b22409b0f5e81933469918fa04c2cf7f9a76ce84cb29dad", - "sha256:daf2998e3f5bcdd579a18faf009f37f53538e9b7d0a252581a610297d31e8536", - "sha256:e015f900b4b84e908f8035ab16ebf02d67389c1c216c17a2196fc2e515c00762", - "sha256:e50d5d20b87758acf8f13f39a3b3eb21d5ef32339d2bc8cdeb8092416e0051df", - "sha256:f5a8015c74ec8f6508aebf493b58ba20ccb4da8168bf05f0c2a37faccb518da9" + "sha256:04c185102e659439c5bd428ac5473d36ef795fca8e225bbbe78e20643d804ec0", + "sha256:10dad599b9f7ce2194996dc25f1000e0aa15754ecef9db46b624713959c67957", + "sha256:1e88ebc4e08e661e9011b5c1ebfb32f0d311963a9824a6effb4168c7e07918b1", + "sha256:4f3ad09cf3352bf5664794042b28d98781362ec8d9774ad73f28a1a0101a27f1", + "sha256:5f8a8d982bde211e65dc2de1f8f36cf162f9cc7fcd8a7625046ea265284e5e65", + "sha256:67b94f4e694ae33fc90dfb7da0e6b5ed5f671dd0acc2f6cf46e9c39d56e16e1a", + "sha256:808f5992320e9631b2e45444028a65cd6ba3ee40229292934178ef07020a5ffd", + "sha256:8eda240225971b5cd0bac2d399ed6222068f0598ee92d5f6e847bd2019d2c8b0", + "sha256:911d773da9fa4d4f3f7580173858c391e3ee0b61acaf0be303baab323d2eae78", + "sha256:9cc464a1c51baad28ffb7a233116e8d4ce4c560b32039fa986d0f992ac3c431f", + "sha256:a162ed199cd2ec392cffe20b2fa3381b68e7a166d55f3f060eceb8d517e4f46d", + "sha256:aa87df0982aa0f4477478899d9c930cc0f97cd6d8a4ce84c43ac88ccf86d1da7", + "sha256:ae237492767e0225f99b53a0fd7110fde2b7e7cabc105bbc243c151a7497de88", + "sha256:ae5534fa7a3b74f20534694d297fce6f7483890ff6ca404394ecf372f3c589d4", + "sha256:b635e7e21fea5af74e90fc9e54d1a4c27078efdce6f214101c98dd93afae599a", + "sha256:b6c74bbec679199746a3e02c0e0fad093c3652df96dd63e086a2fbf2afe9dc0e", + "sha256:c4193e1069d165476b2d0f7d882b7712b3eab6e2e6fe2a0a78ef40de825a1f28", + "sha256:da88abc5e2f6a8fb07533855a57ca2a31845f58901a87f821b68b0db6b023978", + "sha256:ebbba7707fe83a01e54bce8e3e7342feb0b3e0d74ff8c28df12f8bc59b76827c" ], - "markers": "python_version >= '3.5'", - "version": "==2.6.1.post1" + "index": "pypi", + "version": "==3.1.0" }, "python-dateutil": { "hashes": [ - "sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c", - "sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a" + "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86", + "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==2.8.1" + "version": "==2.8.2" + }, + "python-dotenv": { + "hashes": [ + "sha256:32b2bdc1873fd3a3c346da1c6db83d0053c3c62f28f1f38516070c4c8971b1d3", + "sha256:a5de49a31e953b45ff2d2fd434bbc2670e8db5273606c1e737cc6b93eff3655f" + ], + "index": "pypi", + "version": "==0.19.2" }, "pytz": { "hashes": [ - "sha256:a494d53b6d39c3c6e44c3bec237336e14305e4f29bbf800b599253057fbb79ed", - "sha256:c35965d010ce31b23eeb663ed3cc8c906275d6be1a34393a1d73a41febf4a048" + "sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c", + "sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326" ], - "version": "==2020.1" + "version": "==2021.3" }, "rasterio": { "hashes": [ @@ -410,28 +539,33 @@ }, "scipy": { "hashes": [ - "sha256:07b083128beae040f1129bd8a82b01804f5e716a7fd2962c1053fa683433e4ab", - "sha256:0edd67e8a00903aaf7a29c968555a2e27c5a69fea9d1dcfffda80614281a884f", - "sha256:12fdcbfa56cac926a0a9364a30cbf4ad03c2c7b59f75b14234656a5e4fd52bf3", - "sha256:1fee28b6641ecbff6e80fe7788e50f50c5576157d278fa40f36c851940eb0aff", - "sha256:33e6a7439f43f37d4c1135bc95bcd490ffeac6ef4b374892c7005ce2c729cf4a", - "sha256:5163200ab14fd2b83aba8f0c4ddcc1fa982a43192867264ab0f4c8065fd10d17", - "sha256:66ec29348444ed6e8a14c9adc2de65e74a8fc526dc2c770741725464488ede1f", - "sha256:8cc5c39ed287a8b52a5509cd6680af078a40b0e010e2657eca01ffbfec929468", - "sha256:a1a13858b10d41beb0413c4378462b43eafef88a1948d286cb357eadc0aec024", - "sha256:a3db1fe7c6cb29ca02b14c9141151ebafd11e06ffb6da8ecd330eee5c8283a8a", - "sha256:aebb69bcdec209d874fc4b0c7ac36f509d50418a431c1422465fa34c2c0143ea", - "sha256:b9751b39c52a3fa59312bd2e1f40144ee26b51404db5d2f0d5259c511ff6f614", - "sha256:bc0e63daf43bf052aefbbd6c5424bc03f629d115ece828e87303a0bcc04a37e4", - "sha256:d5e3cc60868f396b78fc881d2c76460febccfe90f6d2f082b9952265c79a8788", - "sha256:ddae76784574cc4c172f3d5edd7308be16078dd3b977e8746860c76c195fa707", - "sha256:e2602f79c85924e4486f684aa9bbab74afff90606100db88d0785a0088be7edb", - "sha256:e527c9221b6494bcd06a17f9f16874406b32121385f9ab353b8a9545be458f0b", - "sha256:f574558f1b774864516f3c3fe072ebc90a29186f49b720f60ed339294b7f32ac", - "sha256:ffcbd331f1ffa82e22f1d408e93c37463c9a83088243158635baec61983aaacf" - ], - "markers": "python_version >= '3.6'", - "version": "==1.5.3" + "sha256:1437073f1d4664990879aa8f9547524764372e0fef84a077be4b19e82bba7a8d", + "sha256:17fd991a275e4283453f89d404209aa92059ac68d76d804b4bc1716a3742e1b5", + "sha256:1ea6233f5a365cb7945b4304bd06323ece3ece85d6a3fa8598d2f53e513467c9", + "sha256:2d25272c03ee3c0fe5e0dff1bb7889280bb6c9e1766fa9c7bde81ad8a5f78694", + "sha256:30bdda199667e74b50208a793eb1ba47a04e5e3fa16f5ff06c6f7969ae78e4da", + "sha256:359b60a0cccd17723b9d5e329a5212a710e771a3ddde800e472fb93732756c46", + "sha256:39f838ea5ce8da868785193d88d05cf5a6d5c390804ec99de29a28e1dcdd53e6", + "sha256:4d175ba93e00d8eef8f7cd70d4d88a9106a86800c82ea03cf2268c36d6545483", + "sha256:5273d832fb9cd5724ee0d335c16a903b923441107dd973d27fc4293075a9f4e3", + "sha256:54951f51d731c832b1b8885e0a92e89f33d087de7e40d02078bf0d49c7cbdbb5", + "sha256:74f518ce542533054695f743e4271cb8986b63f95bb51d70fcee4f3929cbff7d", + "sha256:7b1d0f5f524518f1a86f288443528e4ff4a739c0966db663af4129b7ac7849f8", + "sha256:82c5befebf54d799d77e5f0205c03030f57f69ba2541baa44d2e6ad138c28cd3", + "sha256:8482c8e45857ab0a5446eb7460d2307a27cbbe659d6d2257820c6d6eb950fd0f", + "sha256:87cf3964db0f1cce17aeed5bfc1b89a6b4b07dbfc48e50d21fa3549e00456803", + "sha256:8b5726a0fedeaa6beb1095e4466998bdd1d1e960b28db9b5a16c89cbd7b2ebf1", + "sha256:97eb573e361a73a553b915dc195c6f72a08249964b1a33f157f9659f3b6210d1", + "sha256:a80eb01c43fd98257ec7a49ff5cec0edba32031b5f86503f55399a48cb2c5379", + "sha256:cac71d5476a6f56b50459da21f6221707e0051ebd428b2137db32ef4a43bb15e", + "sha256:d86abd1ddf421dea5e9cebfeb4de0d205b3dc04e78249afedba9c6c3b2227ff2", + "sha256:dc2d1bf41294e63c7302bf499973ac0c7f73c93c01763db43055f6525234bf11", + "sha256:e08b81fcd9bf98740b58dc6fdd7879e33a64dcb682201c1135f7d4a75216bb05", + "sha256:e3efe7ef75dfe627b354ab0af0dbc918eadee97cc80ff1aabea6d3e01114ebdd", + "sha256:fa2dbabaaecdb502641b0b3c00dec05fb475ae48655c66da16c9ed24eda1e711" + ], + "markers": "python_version < '3.11' and python_version >= '3.7'", + "version": "==1.7.2" }, "seaborn": { "hashes": [ @@ -441,6 +575,14 @@ "index": "pypi", "version": "==0.11.0" }, + "setuptools-scm": { + "hashes": [ + "sha256:4c64444b1d49c4063ae60bfe1680f611c8b13833d556fd1d6050c0023162a119", + "sha256:a49aa8081eeb3514eb9728fa5040f2eaa962d6c6f4ec9c32f6c1fba88f88a0f2" + ], + "markers": "python_version >= '3.6'", + "version": "==6.3.2" + }, "shapely": { "hashes": [ "sha256:11090bd5b5f11d54e1924a11198226971dab6f392c2e5a3c74514857f764b971", @@ -468,62 +610,78 @@ }, "simplejson": { "hashes": [ - "sha256:034550078a11664d77bc1a8364c90bb7eef0e44c2dbb1fd0a4d92e3997088667", - "sha256:05b43d568300c1cd43f95ff4bfcff984bc658aa001be91efb3bb21df9d6288d3", - "sha256:0dd9d9c738cb008bfc0862c9b8fa6743495c03a0ed543884bf92fb7d30f8d043", - "sha256:10fc250c3edea4abc15d930d77274ddb8df4803453dde7ad50c2f5565a18a4bb", - "sha256:2862beabfb9097a745a961426fe7daf66e1714151da8bb9a0c430dde3d59c7c0", - "sha256:292c2e3f53be314cc59853bd20a35bf1f965f3bc121e007ab6fd526ed412a85d", - "sha256:2d3eab2c3fe52007d703a26f71cf649a8c771fcdd949a3ae73041ba6797cfcf8", - "sha256:2e7b57c2c146f8e4dadf84977a83f7ee50da17c8861fd7faf694d55e3274784f", - "sha256:311f5dc2af07361725033b13cc3d0351de3da8bede3397d45650784c3f21fbcf", - "sha256:344e2d920a7f27b4023c087ab539877a1e39ce8e3e90b867e0bfa97829824748", - "sha256:3fabde09af43e0cbdee407555383063f8b45bfb52c361bc5da83fcffdb4fd278", - "sha256:42b8b8dd0799f78e067e2aaae97e60d58a8f63582939af60abce4c48631a0aa4", - "sha256:4b3442249d5e3893b90cb9f72c7d6ce4d2ea144d2c0d9f75b9ae1e5460f3121a", - "sha256:55d65f9cc1b733d85ef95ab11f559cce55c7649a2160da2ac7a078534da676c8", - "sha256:5c659a0efc80aaaba57fcd878855c8534ecb655a28ac8508885c50648e6e659d", - "sha256:72d8a3ffca19a901002d6b068cf746be85747571c6a7ba12cbcf427bfb4ed971", - "sha256:75ecc79f26d99222a084fbdd1ce5aad3ac3a8bd535cd9059528452da38b68841", - "sha256:76ac9605bf2f6d9b56abf6f9da9047a8782574ad3531c82eae774947ae99cc3f", - "sha256:7d276f69bfc8c7ba6c717ba8deaf28f9d3c8450ff0aa8713f5a3280e232be16b", - "sha256:7f10f8ba9c1b1430addc7dd385fc322e221559d3ae49b812aebf57470ce8de45", - "sha256:8042040af86a494a23c189b5aa0ea9433769cc029707833f261a79c98e3375f9", - "sha256:813846738277729d7db71b82176204abc7fdae2f566e2d9fcf874f9b6472e3e6", - "sha256:845a14f6deb124a3bcb98a62def067a67462a000e0508f256f9c18eff5847efc", - "sha256:869a183c8e44bc03be1b2bbcc9ec4338e37fa8557fc506bf6115887c1d3bb956", - "sha256:8acf76443cfb5c949b6e781c154278c059b09ac717d2757a830c869ba000cf8d", - "sha256:8f713ea65958ef40049b6c45c40c206ab363db9591ff5a49d89b448933fa5746", - "sha256:934115642c8ba9659b402c8bdbdedb48651fb94b576e3b3efd1ccb079609b04a", - "sha256:9551f23e09300a9a528f7af20e35c9f79686d46d646152a0c8fc41d2d074d9b0", - "sha256:9a2b7543559f8a1c9ed72724b549d8cc3515da7daf3e79813a15bdc4a769de25", - "sha256:a55c76254d7cf8d4494bc508e7abb993a82a192d0db4552421e5139235604625", - "sha256:ad8f41c2357b73bc9e8606d2fa226233bf4d55d85a8982ecdfd55823a6959995", - "sha256:af4868da7dd53296cd7630687161d53a7ebe2e63814234631445697bd7c29f46", - "sha256:afebfc3dd3520d37056f641969ce320b071bc7a0800639c71877b90d053e087f", - "sha256:b59aa298137ca74a744c1e6e22cfc0bf9dca3a2f41f51bc92eb05695155d905a", - "sha256:bc00d1210567a4cdd215ac6e17dc00cb9893ee521cee701adfd0fa43f7c73139", - "sha256:c1cb29b1fced01f97e6d5631c3edc2dadb424d1f4421dad079cb13fc97acb42f", - "sha256:c94dc64b1a389a416fc4218cd4799aa3756f25940cae33530a4f7f2f54f166da", - "sha256:ceaa28a5bce8a46a130cd223e895080e258a88d51bf6e8de2fc54a6ef7e38c34", - "sha256:cff6453e25204d3369c47b97dd34783ca820611bd334779d22192da23784194b", - "sha256:d0b64409df09edb4c365d95004775c988259efe9be39697d7315c42b7a5e7e94", - "sha256:d4813b30cb62d3b63ccc60dd12f2121780c7a3068db692daeb90f989877aaf04", - "sha256:da3c55cdc66cfc3fffb607db49a42448785ea2732f055ac1549b69dcb392663b", - "sha256:e058c7656c44fb494a11443191e381355388443d543f6fc1a245d5d238544396", - "sha256:fed0f22bf1313ff79c7fc318f7199d6c2f96d4de3234b2f12a1eab350e597c06", - "sha256:ffd4e4877a78c84d693e491b223385e0271278f5f4e1476a4962dca6824ecfeb" + "sha256:04e31fa6ac8e326480703fb6ded1488bfa6f1d3f760d32e29dbf66d0838982ce", + "sha256:068670af975247acbb9fc3d5393293368cda17026db467bf7a51548ee8f17ee1", + "sha256:07ecaafc1b1501f275bf5acdee34a4ad33c7c24ede287183ea77a02dc071e0c0", + "sha256:0b4126cac7d69ac06ff22efd3e0b3328a4a70624fcd6bca4fc1b4e6d9e2e12bf", + "sha256:0de783e9c2b87bdd75b57efa2b6260c24b94605b5c9843517577d40ee0c3cc8a", + "sha256:12133863178a8080a3dccbf5cb2edfab0001bc41e5d6d2446af2a1131105adfe", + "sha256:1c9b1ed7ed282b36571638297525f8ef80f34b3e2d600a56f962c6044f24200d", + "sha256:23fe704da910ff45e72543cbba152821685a889cf00fc58d5c8ee96a9bad5f94", + "sha256:28221620f4dcabdeac310846629b976e599a13f59abb21616356a85231ebd6ad", + "sha256:35a49ebef25f1ebdef54262e54ae80904d8692367a9f208cdfbc38dbf649e00a", + "sha256:37bc0cf0e5599f36072077e56e248f3336917ded1d33d2688624d8ed3cefd7d2", + "sha256:3fe87570168b2ae018391e2b43fbf66e8593a86feccb4b0500d134c998983ccc", + "sha256:3ff5b3464e1ce86a8de8c88e61d4836927d5595c2162cab22e96ff551b916e81", + "sha256:401d40969cee3df7bda211e57b903a534561b77a7ade0dd622a8d1a31eaa8ba7", + "sha256:4b6bd8144f15a491c662f06814bd8eaa54b17f26095bb775411f39bacaf66837", + "sha256:4c09868ddb86bf79b1feb4e3e7e4a35cd6e61ddb3452b54e20cf296313622566", + "sha256:4d1c135af0c72cb28dd259cf7ba218338f4dc027061262e46fe058b4e6a4c6a3", + "sha256:4ff4ac6ff3aa8f814ac0f50bf218a2e1a434a17aafad4f0400a57a8cc62ef17f", + "sha256:521877c7bd060470806eb6335926e27453d740ac1958eaf0d8c00911bc5e1802", + "sha256:522fad7be85de57430d6d287c4b635813932946ebf41b913fe7e880d154ade2e", + "sha256:5540fba2d437edaf4aa4fbb80f43f42a8334206ad1ad3b27aef577fd989f20d9", + "sha256:5d6b4af7ad7e4ac515bc6e602e7b79e2204e25dbd10ab3aa2beef3c5a9cad2c7", + "sha256:5decdc78849617917c206b01e9fc1d694fd58caa961be816cb37d3150d613d9a", + "sha256:632ecbbd2228575e6860c9e49ea3cc5423764d5aa70b92acc4e74096fb434044", + "sha256:65b998193bd7b0c7ecdfffbc825d808eac66279313cb67d8892bb259c9d91494", + "sha256:67093a526e42981fdd954868062e56c9b67fdd7e712616cc3265ad0c210ecb51", + "sha256:681eb4d37c9a9a6eb9b3245a5e89d7f7b2b9895590bb08a20aa598c1eb0a1d9d", + "sha256:69bd56b1d257a91e763256d63606937ae4eb890b18a789b66951c00062afec33", + "sha256:724c1fe135aa437d5126138d977004d165a3b5e2ee98fc4eb3e7c0ef645e7e27", + "sha256:7255a37ff50593c9b2f1afa8fafd6ef5763213c1ed5a9e2c6f5b9cc925ab979f", + "sha256:743cd768affaa508a21499f4858c5b824ffa2e1394ed94eb85caf47ac0732198", + "sha256:80d3bc9944be1d73e5b1726c3bbfd2628d3d7fe2880711b1eb90b617b9b8ac70", + "sha256:82ff356ff91be0ab2293fc6d8d262451eb6ac4fd999244c4b5f863e049ba219c", + "sha256:8e8607d8f6b4f9d46fee11447e334d6ab50e993dd4dbfb22f674616ce20907ab", + "sha256:97202f939c3ff341fc3fa84d15db86156b1edc669424ba20b0a1fcd4a796a045", + "sha256:9b01e7b00654115965a206e3015f0166674ec1e575198a62a977355597c0bef5", + "sha256:9fa621b3c0c05d965882c920347b6593751b7ab20d8fa81e426f1735ca1a9fc7", + "sha256:a1aa6e4cae8e3b8d5321be4f51c5ce77188faf7baa9fe1e78611f93a8eed2882", + "sha256:a2d30d6c1652140181dc6861f564449ad71a45e4f165a6868c27d36745b65d40", + "sha256:a649d0f66029c7eb67042b15374bd93a26aae202591d9afd71e111dd0006b198", + "sha256:a7854326920d41c3b5d468154318fe6ba4390cb2410480976787c640707e0180", + "sha256:a89acae02b2975b1f8e4974cb8cdf9bf9f6c91162fb8dec50c259ce700f2770a", + "sha256:a8bbdb166e2fb816e43ab034c865147edafe28e1b19c72433147789ac83e2dda", + "sha256:ac786f6cb7aa10d44e9641c7a7d16d7f6e095b138795cd43503769d4154e0dc2", + "sha256:b09bc62e5193e31d7f9876220fb429ec13a6a181a24d897b9edfbbdbcd678851", + "sha256:b10556817f09d46d420edd982dd0653940b90151d0576f09143a8e773459f6fe", + "sha256:b81076552d34c27e5149a40187a8f7e2abb2d3185576a317aaf14aeeedad862a", + "sha256:bdfc54b4468ed4cd7415928cbe782f4d782722a81aeb0f81e2ddca9932632211", + "sha256:cf6e7d5fe2aeb54898df18db1baf479863eae581cce05410f61f6b4188c8ada1", + "sha256:cf98038d2abf63a1ada5730e91e84c642ba6c225b0198c3684151b1f80c5f8a6", + "sha256:d24a9e61df7a7787b338a58abfba975414937b609eb6b18973e25f573bc0eeeb", + "sha256:d74ee72b5071818a1a5dab47338e87f08a738cb938a3b0653b9e4d959ddd1fd9", + "sha256:dd16302d39c4d6f4afde80edd0c97d4db643327d355a312762ccd9bd2ca515ed", + "sha256:dd2fb11922f58df8528adfca123f6a84748ad17d066007e7ac977720063556bd", + "sha256:deac4bdafa19bbb89edfb73b19f7f69a52d0b5bd3bb0c4ad404c1bbfd7b4b7fd", + "sha256:e03c3b8cc7883a54c3f34a6a135c4a17bc9088a33f36796acdb47162791b02f6", + "sha256:e1ec8a9ee0987d4524ffd6299e778c16cc35fef6d1a2764e609f90962f0b293a", + "sha256:e8603e691580487f11306ecb066c76f1f4a8b54fb3bdb23fa40643a059509366", + "sha256:f444762fed1bc1fd75187ef14a20ed900c1fbb245d45be9e834b822a0223bc81", + "sha256:f63600ec06982cdf480899026f4fda622776f5fabed9a869fdb32d72bc17e99a", + "sha256:fb62d517a516128bacf08cb6a86ecd39fb06d08e7c4980251f5d5601d29989ba" ], "markers": "python_version >= '2.5' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==3.17.2" + "version": "==3.17.6" }, "six": { "hashes": [ - "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259", - "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced" + "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", + "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==1.15.0" + "version": "==1.16.0" }, "snuggs": { "hashes": [ @@ -532,6 +690,14 @@ ], "version": "==1.4.7" }, + "tomli": { + "hashes": [ + "sha256:c6ce0015eb38820eaf32b5db832dbc26deb3dd427bd5f6556cf0acac2c214fee", + "sha256:f04066f68f5554911363063a30b108d2b5a5b1a010aa8b6132af78489fe3aade" + ], + "markers": "python_version >= '3.6'", + "version": "==1.2.2" + }, "tqdm": { "hashes": [ "sha256:6baa75a88582b1db6d34ce4690da5501d2a1cb65c34664840a456b2c9f794d29", @@ -551,41 +717,43 @@ }, "decorator": { "hashes": [ - "sha256:41fa54c2a0cc4ba648be4fd43cff00aedf5b9465c9bf18d64325bc225f08f760", - "sha256:e3a62f0520172440ca0dcc823749319382e377f37f140a0b99ef45fecb84bfe7" + "sha256:7b12e7c3c6ab203a29e157335e9122cb03de9ab7264b137594103fd4a683b374", + "sha256:e59913af105b9860aa2c8d3272d9de5a56a4e608db9a2f167a8480b323d529a7" ], - "version": "==4.4.2" + "markers": "python_version >= '3.5'", + "version": "==5.1.0" }, "ipython": { "hashes": [ - "sha256:2e22c1f74477b5106a6fb301c342ab8c64bb75d702e350f05a649e8cb40a0fb8", - "sha256:a331e78086001931de9424940699691ad49dfb457cea31f5471eae7b78222d5e" + "sha256:4f69d7423a5a1972f6347ff233e38bbf4df6a150ef20fbb00c635442ac3060aa", + "sha256:a658beaf856ce46bc453366d5dc6b2ddc6c481efd3540cb28aa3943819caac9f" ], "index": "pypi", - "version": "==7.18.1" + "version": "==7.29.0" }, - "ipython-genutils": { + "jedi": { "hashes": [ - "sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8", - "sha256:eb2e116e75ecef9d4d228fdc66af54269afa26ab4463042e33785b887c628ba8" + "sha256:637c9635fcf47945ceb91cd7f320234a7be540ded6f3e99a50cb6febdfd1ba8d", + "sha256:74137626a64a99c8eb6ae5832d99b3bdd7d29a3850fe2aa80a4126b2a7d949ab" ], - "version": "==0.2.0" + "markers": "python_version >= '3.6'", + "version": "==0.18.1" }, - "jedi": { + "matplotlib-inline": { "hashes": [ - "sha256:86ed7d9b750603e4ba582ea8edc678657fb4007894a12bcf6f4bb97892f31d20", - "sha256:98cc583fa0f2f8304968199b01b6b4b94f469a1f4a74c1560506ca2a211378b5" + "sha256:a04bfba22e0d1395479f866853ec1ee28eea1485c1d69a6faf00dc3e24ff34ee", + "sha256:aed605ba3b72462d64d475a21a9296f400a19c4f74a31b59103d2a99ffd5aa5c" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", - "version": "==0.17.2" + "markers": "python_version >= '3.5'", + "version": "==0.1.3" }, "parso": { "hashes": [ - "sha256:97218d9159b2520ff45eb78028ba8b50d2bc61dcc062a9682666f2dc4bd331ea", - "sha256:caba44724b994a8a5e086460bb212abc5a8bc46951bf4a9a1210745953622eb9" + "sha256:12b83492c6239ce32ff5eed6d3639d6a536170723c6f3f1506869f1ace413398", + "sha256:a8c4922db71e4fdb90e0d0bc6e50f9b273d3397925e5e60a717e719201778d22" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==0.7.1" + "markers": "python_version >= '3.6'", + "version": "==0.8.2" }, "pexpect": { "hashes": [ @@ -604,34 +772,34 @@ }, "prompt-toolkit": { "hashes": [ - "sha256:25c95d2ac813909f813c93fde734b6e44406d1477a9faef7c915ff37d39c0a8c", - "sha256:7debb9a521e0b1ee7d2fe96ee4bd60ef03c6492784de0547337ca4433e46aa63" + "sha256:449f333dd120bd01f5d296a8ce1452114ba3a71fae7288d2f0ae2c918764fa72", + "sha256:48d85cdca8b6c4f16480c7ce03fd193666b62b0a21667ca56b4bb5ad679d1170" ], - "markers": "python_full_version >= '3.6.1'", - "version": "==3.0.8" + "markers": "python_full_version >= '3.6.2'", + "version": "==3.0.22" }, "ptyprocess": { "hashes": [ - "sha256:923f299cc5ad920c68f2bc0bc98b75b9f838b93b599941a6b63ddbc2476394c0", - "sha256:d7cc528d76e76342423ca640335bd3633420dc1366f258cb31d05e865ef5ca1f" + "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35", + "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220" ], - "version": "==0.6.0" + "version": "==0.7.0" }, "pygments": { "hashes": [ - "sha256:307543fe65c0947b126e83dd5a61bd8acbd84abec11f43caebaf5534cbc17998", - "sha256:926c3f319eda178d1bd90851e4317e6d8cdb5e292a3386aac9bd75eca29cf9c7" + "sha256:b8e67fe6af78f492b3c4b3e2970c0624cbf08beb1e493b2c99b9fa1b67a20380", + "sha256:f398865f7eb6874156579fdf36bc840a03cab64d1cde9e93d68f46a425ec52c6" ], "markers": "python_version >= '3.5'", - "version": "==2.7.1" + "version": "==2.10.0" }, "traitlets": { "hashes": [ - "sha256:178f4ce988f69189f7e523337a3e11d91c786ded9360174a3d9ca83e79bc5396", - "sha256:69ff3f9d5351f31a7ad80443c2674b7099df13cc41fc5fa6e2f6d3b0330b0426" + "sha256:059f456c5a7c1c82b98c2e8c799f39c9b8128f6d0d46941ee118daace9eb70c7", + "sha256:2d313cc50a42cd6c277e7d7dc8d4d7fedd06a2c215f78766ae7b1a66277e0033" ], "markers": "python_version >= '3.7'", - "version": "==5.0.5" + "version": "==5.1.1" }, "wcwidth": { "hashes": [ diff --git a/README.md b/README.md index 63c2a27f2..24cee3ce7 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,48 @@ -### Cahaba: Flood Inundation Mapping for U.S. National Water Model +## Cahaba: Flood Inundation Mapping for U.S. National Water Model -Flood inundation mapping software configured to work with the U.S. National Water Model operated and maintained by the National Oceanic and Atmospheric Administration (NOAA) National Weather Service (NWS). Software enables inundation mapping capability by generating Relative Elevation Models (REMs) and Synthetic Rating Curves (SRCs). Included are tests to evaluate skill and computational efficiency as well as functions to generate inundation maps. +Flood inundation mapping software configured to work with the U.S. National Water Model operated and maintained by the National Oceanic and Atmospheric Administration (NOAA) National Water Center (NWC). -## Dependencies +This software uses the Height Above Nearest Drainage (HAND) method to generate Relative Elevation Models (REMs), Synthetic Rating Curves (SRCs), and catchment grids. This repository also includes functionality to generate flood inundation maps (FIMs) and evaluate FIM accuracy. -[Docker](https://docs.docker.com/get-docker/) +#### For more information, see the [Cahaba Wiki](https://github.com/NOAA-OWP/cahaba/wiki). + +## Accessing Data through ESIP S3 Bucket +The latest national generated HAND data and a subset of the inputs can be found in an Amazon S3 Bucket hosted by [Earth Science Information Partners (ESIP)](https://www.esipfed.org/). These data can be accessed using the AWS CLI tools. + +AWS Region: `US East (N. Virginia) us-east-1` + +AWS Resource Name: `arn:aws:s3:::noaa-nws-owp-fim` + +### Configuring the AWS CLI + +1. [Install AWS CLI tools](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html) -## Installation +2. [Configure AWS CLI tools](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-quickstart.html) + +### Accessing Data using the AWS CLI + +This S3 Bucket (`s3://noaa-nws-owp-fim`) is set up as a "Requester Pays" bucket. Read more about what that means [here](https://docs.aws.amazon.com/AmazonS3/latest/userguide/RequesterPaysBuckets.html). If you are using compute resources in the same region as the S3 Bucket, then there is no cost. + +#### Examples + +List bucket folder structure: +``` +aws s3 ls s3://noaa-nws-owp-fim/ --request-payer requester +``` +Download a directory of outputs for a HUC8: +``` +aws s3 cp --recursive s3://noaa-nws-owp-fim/hand_fim/fim_3_0_21_0/outputs/fr/12090301 12090301 --request-payer requester +``` + +## Running the Code +### Input Data +Input data can be found on the ESIP S3 Bucket (see "Accessing Data through ESIP S3 Bucket" section above). All necessary non-publicly available files are in this S3 bucket, as well as sample input data for HUCs 1204 and 1209. + +### Dependencies +[Docker](https://docs.docker.com/get-docker/) + +### Installation 1. Install Docker : [Docker](https://docs.docker.com/get-docker/) 2. Build Docker Image : `docker build -f Dockerfile.dev -t : ` 3. Create FIM group on host machine: @@ -15,64 +50,54 @@ Flood inundation mapping software configured to work with the U.S. National Wate 4. Change group ownership of repo (needs to be redone when a new file occurs in the repo): - Linux: `chgrp -R fim ` -## Configuration - -Software is configurable via parameters found in config directory. Copy files before editing and remove "template" pattern from the filename. +### Configuration +This software is configurable via parameters found in the `config` directory. Copy files before editing and remove "template" pattern from the filename. Make sure to set the config folder group to 'fim' recursively using the chown command. Each development version will include a calibrated parameter set of manning’s n values. -- params_template.env -- mannings_default.json - - must change filepath in params_template.env under "manning_n" variable name -- params_calibrated.env - - runs calibrated mannings parameters from mannings_calibrated.json - -## Input Data - -The following input data sources should be downloaded and preprocessed prior to executing the preprocessing & hydrofabric generation code: -USACE National Levee Database: --Access here: https://levees.sec.usace.army.mil/ --Recommend downloading the “Full GeoJSON” file for the area of interest --Unzip data and then use the preprocessing scripts to filter data and fix geometries where needed -AHPs site locations for MS extent (currently not available to public) -NHDPlus HR datasets --Acquire_and_preprocess_inputs.py --aggregate_nhd_hr_streams.py -NWM Hydrofabric --nwm_flows.gpkg (currently not available to public) --nwm_catchments.gpkg (currently not available to public) --nwm_lakes.gpkg (currently not available to public) --nwm_headwaters.gpkg - derived - -NOTE: Some of the input data is not easy to acquire and will need to be shared with outside users. We are currently working on providing this functionality and should be available soon. - -## Usage - -1. Run Docker Container : `docker run --rm -it -v :/data -v :/foss_fim :` -2. Acquire and Prepare Data : `/foss_fim/lib/acquire_and_preprocess_inputs.py -u ` - - `-u` can be a single HUC4, series of HUC4s (e.g. 1209 1210), path to line-delimited file with HUC4s. - - Please run `/foss_fim/lib/acquire_and_preprocess_inputs.py --help` for more information. - - See United States Geological Survey (USGS) National Hydrography Dataset Plus High Resolution (NHDPlusHR) [site](https://www.usgs.gov/core-science-systems/ngp/national-hydrography/nhdplus-high-resolution) for more information -3. Aggregate NHD HR streams and create NWM headwater points : /foss_fim/lib/aggregate_vector_inputs.py -4. Produce Hydrofabric : `fim_run.sh -u -c /foss_fim/config/ -n ` - - `-u` can be a single huc, a series passed in quotes, or a line-delimited file - i. To run entire domain of available data use one of the `/data/inputs/included_huc[4,6,8].lst` files - - Outputs can be found under `/data/outputs/` - -## Evaluate FIM output to a Benchmark Dataset -Once the hydrofabric has been generated from fim_run.sh for, evaluation against a benchmark dataset can be performed using binary contingency statistics. One benchmark dataset that can be used for evaluations are Base Level Engineering studies available on the FEMA Base Flood Elevation Viewer. To acquire FEMA datasets go to the FEMA Base Flood Elevation Viewer (https://webapps.usgs.gov/infrm/estbfe/) and download the file geodatabase and depth grids for a HUC. To perform an evaluation a flow forecast file is required and benchmark grids are preprocessed prior to running run_test_case.py. - -1. Flow Forecast File Creation -`/foss_fim/tests/preprocess/create_flow_forecast_file.py -b -n -o -xs -hu -huid -l -f ` -For example, if HUC 12090301 were downloaded from the FEMA BFE viewer the geodatabase, “BLE_LowColoradoCummins.gdb”, contains a HUC Layer “S_HUC_Ar” (-hu) and a cross section layer “XS” (-xs). The HUC ID corresponds to the “HUC_CODE” field (-huid) within the “S_HUC_AR” layer. Additionally, the National Water Model geodatabase (-n) will be required with the stream layer (-l) along with the ID field (-f) in the attribute table. Instructions on how to obtain the National Water Model GIS layers will be forthcoming. - -2. Process benchmark grid data -`/foss_fim/tests/preprocess/preprocess_benchmark.py -b -r -o ` -For HUC 12090301, the benchmark datasets (-b) are the 100 year (“BLE_DEP01PCT”) and 500 year (“BLE_DEP0_2PCT”) depth grids converted to Geotiff format. An example of a reference dataset (-r) is the “rem_zeroed_masked.tif” produced as part of the hydrofabric from fim_run.sh. The output raster name (if doing ble data) should be `ble_huc__depth_.tif` where event is '100yr' or '500yr'. Once the flow file and benchmark grids are processed, the output files are then placed in this folder (from inside a Docker container): -`/foss_fim/tests_cases/validation_data_ble///` where event is ‘100yr’ or ‘500yr’ - -3. Run hydrologic evaluation (from inside Docker container): `/foss_fim/tests/run_test_case.py -r -b -t ` - - More information can be found by running `/foss_fim/tests/run_test_case.py --help` - -## Dependencies +- `params_template.env` +- `mannings_default.json` + - must change filepath in `params_template.env` in `manning_n` variable name +- `params_calibrated.env` + - runs calibrated mannings parameters from `mannings_calibrated.json` + +### Produce HAND Hydrofabric +``` +fim_run.sh -u -c /foss_fim/config/ -n +``` +- `-u` can be a single huc, a series passed in quotes, or a line-delimited file + i. To run entire domain of available data use one of the ```/data/inputs/included_huc[4,6,8].lst``` files +- Outputs can be found under ```/data/outputs/``` + +### Testing in Other HUCs +To test in HUCs other than the provided HUCs, the following processes can be followed to acquire and preprocess additional NHDPlus rasters and vectors. After these steps are run, the "Produce HAND Hydrofabric" step can be run for the new HUCs. + +``` +/foss_fim/src/acquire_and_preprocess_inputs.py -u +``` +- `-u` can be a single HUC4, series of HUC4s (e.g. 1209 1210), path to line-delimited file with HUC4s. +- Please run `/foss_fim/src/acquire_and_preprocess_inputs.py --help` for more information. +- See United States Geological Survey (USGS) National Hydrography Dataset Plus High Resolution (NHDPlusHR) [site](https://www.usgs.gov/core-science-systems/ngp/national-hydrography/nhdplus-high-resolution) for more information + +#### Reproject NHDPlus High-Res Rasters and Convert to Meters. +``` +/foss_fim/src/preprocess_rasters.py +``` + +---- +### Evaluating Inundation Map Performance +After `fim_run.sh` completes, you can evaluate the model's skill. The evaluation benchmark datasets are available through ESIP in the `test_cases` directory. + +To evaluate model skill, run the following: +``` +python /foss_fim/tools/synthesize_test_cases.py -c DEV -v -m -j [num_of_jobs] +``` + +More information can be found by running: +``` +python /foss_fim/tools/synthesize_test_cases.py --help +``` + +---- +### Managing Dependencies Dependencies are managed via [Pipenv](https://pipenv.pypa.io/en/latest/). To add new dependencies, from the projects's top-level directory: @@ -90,30 +115,28 @@ and include both `Pipfile` and `Pipfile.lock` in your commits. The docker image If you are on a machine that has a particularly slow internet connection, you may need to increase the timeout of pipenv. To do this simply add `PIPENV_INSTALL_TIMEOUT=10000000` in front of any of your pipenv commands. +---- +### Known Issues & Getting Help -## Known Issues & Getting Help - -Please see the issue tracker on GitHub for known issues and for getting help. - -## Getting Involved +Please see the issue tracker on GitHub and the [Cahaba Wiki](https://github.com/NOAA-OWP/cahaba/wiki/Cahaba-Wiki-Home) for known issues and getting help. -NOAA's National Water Center welcomes anyone to contribute to the Cahaba repository to improve flood inundation mapping capabilities. Please contact Fernando Aristizabal (fernando.aristizabal@noaa.gov) or Fernando Salas (fernando.salas@noaa.gov) to get started. +### Getting Involved ----- +NOAA's National Water Center welcomes anyone to contribute to the Cahaba repository to improve flood inundation mapping capabilities. Please contact Brad Bates (bradford.bates@noaa.gov) or Fernando Salas (fernando.salas@noaa.gov) to get started. -## Open Source Licensing Info -1. [TERMS](TERMS.md) +### Open Source Licensing Info +1. [TERMS](docs/TERMS.md) 2. [LICENSE](LICENSE) ----- - -## Credits and References -1. Office of Water Prediction [(OWP)](https://water.noaa.gov/) -2. National Flood Interoperability Experiment [(NFIE)](https://web.corral.tacc.utexas.edu/nfiedata/) -3. Garousi‐Nejad, I., Tarboton, D. G.,Aboutalebi, M., & Torres‐Rua, A.(2019). Terrain analysis enhancements to the Height Above Nearest Drainage flood inundation mapping method. Water Resources Research, 55 , 7983–8009. https://doi.org/10.1029/2019WR0248375. -4. Zheng, X., D.G. Tarboton, D.R. Maidment, Y.Y. Liu, and P. Passalacqua. 2018. “River Channel Geometry and Rating Curve Estimation Using Height above the Nearest Drainage.” Journal of the American Water Resources Association 54 (4): 785–806. https://doi.org/10.1111/1752-1688.12661. -5. Barnes, Richard. 2016. RichDEM: Terrain Analysis Software. http://github.com/r-barnes/richdem -6. [TauDEM](https://github.com/dtarb/TauDEM) -7. Federal Emergency Management Agency (FEMA) Base Level Engineering [(BLE)](https://webapps.usgs.gov/infrm/estBFE/) -8. Verdin, James; Verdin, Kristine; Mathis, Melissa; Magadzire, Tamuka; Kabuchanga, Eric; Woodbury, Mark; and Gadain, Hussein, 2016, A software tool for rapid flood inundation mapping: U.S. Geological Survey Open-File Report 2016–1038, 26 p., http://dx.doi.org/10.3133/ofr20161038. -9. United States Geological Survey (USGS) National Hydrography Dataset Plus High Resolution (NHDPlusHR). https://www.usgs.gov/core-science-systems/ngp/national-hydrography/nhdplus-high-resolution +### Credits and References +1. [Office of Water Prediction (OWP)](https://water.noaa.gov/) +2. [National Flood Interoperability Experiment(NFIE)](https://web.corral.tacc.utexas.edu/nfiedata/) +3. Garousi‐Nejad, I., Tarboton, D. G.,Aboutalebi, M., & Torres‐Rua, A.(2019). Terrain analysis enhancements to the Height Above Nearest Drainage flood inundation mapping method. Water Resources Research, 55 , 7983–8009. +4. [Zheng, X., D.G. Tarboton, D.R. Maidment, Y.Y. Liu, and P. Passalacqua. 2018. “River Channel Geometry and Rating Curve Estimation Using Height above the Nearest Drainage.” Journal of the American Water Resources Association 54 (4): 785–806.](https://doi.org/10.1111/1752-1688.12661) +5. [Liu, Y. Y., D. R. Maidment, D. G. Tarboton, X. Zheng and S. Wang, (2018), "A CyberGIS Integration and Computation Framework for High-Resolution Continental-Scale Flood Inundation Mapping," JAWRA Journal of the American Water Resources Association, 54(4): 770-784.](https://doi.org/10.1111/1752-1688.12660) +6. [Barnes, Richard. 2016. RichDEM: Terrain Analysis Software](http://github.com/r-barnes/richdem) +7. [TauDEM](https://github.com/dtarb/TauDEM) +8. [Federal Emergency Management Agency (FEMA) Base Level Engineering (BLE)](https://webapps.usgs.gov/infrm/estBFE/) +9. [Verdin, James; Verdin, Kristine; Mathis, Melissa; Magadzire, Tamuka; Kabuchanga, Eric; Woodbury, Mark; and Gadain, Hussein, 2016, A software tool for rapid flood inundation mapping: U.S. Geological Survey Open-File Report 2016–1038, 26](http://dx.doi.org/10.3133/ofr20161038) +10. [United States Geological Survey (USGS) National Hydrography Dataset Plus High Resolution (NHDPlusHR)](https://www.usgs.gov/core-science-systems/ngp/national-hydrography/nhdplus-high-resolution) +11. [Esri Arc Hydro](https://www.esri.com/library/fliers/pdfs/archydro.pdf) diff --git a/api/.gitignore b/api/.gitignore new file mode 100644 index 000000000..2eea525d8 --- /dev/null +++ b/api/.gitignore @@ -0,0 +1 @@ +.env \ No newline at end of file diff --git a/lib/__init__.py b/api/README.md similarity index 100% rename from lib/__init__.py rename to api/README.md diff --git a/api/frontend/.env-template b/api/frontend/.env-template new file mode 100644 index 000000000..afd955443 --- /dev/null +++ b/api/frontend/.env-template @@ -0,0 +1,2 @@ +DATA_PATH= +SOCKET_URL= \ No newline at end of file diff --git a/api/frontend/docker-compose-dev.yml b/api/frontend/docker-compose-dev.yml new file mode 100644 index 000000000..1f7b0ca2d --- /dev/null +++ b/api/frontend/docker-compose-dev.yml @@ -0,0 +1,52 @@ +version: '3.5' +services: + fim_frontend_gui: + image: fim_frontend_gui + build: + context: ./gui + container_name: fim_frontend_gui + restart: always + env_file: + - .env + expose: + - "5000" + networks: + - fim + volumes: + - ./gui/templates/:/opt/gui/templates/ + - ./gui/gui.py:/opt/gui/gui.py + fim_frontend_output_handler: + image: fim_frontend_output_handler + build: + context: ./output_handler + container_name: fim_frontend_output_handler + restart: always + external_links: + - fim_node_connector + env_file: + - .env + networks: + - fim + volumes: + - ${DATA_PATH}:/data/ + - ./output_handler/output_handler.py:/opt/output_handler/output_handler.py + fim_nginx: + image: nginx + container_name: fim_nginx + restart: always + depends_on: + - fim_frontend_gui + external_links: + - fim_node_connector + ports: + - "80:80" + volumes: + - ./nginx-dev.conf:/etc/nginx/nginx.conf:ro + networks: + - fim + command: [nginx] + +networks: + fim: + name: fim + \ No newline at end of file diff --git a/api/frontend/docker-compose-prod.yml b/api/frontend/docker-compose-prod.yml new file mode 100644 index 000000000..b408fe054 --- /dev/null +++ b/api/frontend/docker-compose-prod.yml @@ -0,0 +1,47 @@ +version: '3.5' +services: + fim_frontend_gui: + image: fim_frontend_gui + build: + context: ./gui + container_name: fim_frontend_gui + restart: always + env_file: + - .env + expose: + - "5000" + networks: + - fim + fim_frontend_output_handler: + image: fim_frontend_output_handler + build: + context: ./output_handler + container_name: fim_frontend_output_handler + restart: always + external_links: + - fim_node_connector + env_file: + - .env + networks: + - fim + volumes: + - ${DATA_PATH}:/data/ + fim_nginx: + image: nginx + container_name: fim_nginx + restart: always + depends_on: + - fim_frontend_gui + external_links: + - fim_node_connector + ports: + - "80:80" + volumes: + - ./nginx-prod.conf:/etc/nginx/nginx.conf:ro + networks: + - fim + command: [nginx] + +networks: + fim: + name: fim \ No newline at end of file diff --git a/api/frontend/gui/Dockerfile b/api/frontend/gui/Dockerfile new file mode 100644 index 000000000..fb5d0750e --- /dev/null +++ b/api/frontend/gui/Dockerfile @@ -0,0 +1,14 @@ +FROM python:3.8.5-slim-buster + +ENV PYTHONUNBUFFERED 1 +RUN mkdir -p /opt/gui +WORKDIR /opt/gui + +COPY requirements.txt . +RUN pip install -r requirements.txt --no-cache-dir +COPY . /opt/gui + +EXPOSE 5000 + +RUN chmod +x /opt/gui/entrypoint.sh +ENTRYPOINT ["/opt/gui/entrypoint.sh"] diff --git a/api/frontend/gui/entrypoint.sh b/api/frontend/gui/entrypoint.sh new file mode 100755 index 000000000..fbf7d3145 --- /dev/null +++ b/api/frontend/gui/entrypoint.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +cd /opt/gui/ +echo "Starting Gunicorn" +exec gunicorn --bind 0.0.0.0:5000 --reload wsgi:app \ No newline at end of file diff --git a/api/frontend/gui/gui.py b/api/frontend/gui/gui.py new file mode 100644 index 000000000..456622d72 --- /dev/null +++ b/api/frontend/gui/gui.py @@ -0,0 +1,16 @@ +import os +from gevent import monkey +monkey.patch_all() + +from flask import Flask, render_template, request + +SOCKET_URL = os.environ.get('SOCKET_URL') + +app = Flask(__name__) + +@app.route('/') +def main(): + return render_template('index.html', socket_url=SOCKET_URL) + +if __name__ == '__main__': + app.run("0.0.0.0", port=5000) \ No newline at end of file diff --git a/api/frontend/gui/requirements.txt b/api/frontend/gui/requirements.txt new file mode 100644 index 000000000..a78a884a3 --- /dev/null +++ b/api/frontend/gui/requirements.txt @@ -0,0 +1,4 @@ +flask==1.1.2 +flask-socketio==5.0.0 +gevent==20.9.0 +gunicorn==20.0.4 \ No newline at end of file diff --git a/api/frontend/gui/templates/index.html b/api/frontend/gui/templates/index.html new file mode 100644 index 000000000..421050dbd --- /dev/null +++ b/api/frontend/gui/templates/index.html @@ -0,0 +1,560 @@ + + + Cahaba API + + + + + + + + +
+
+
Release
+
FIM Run
+
Calibration
+
Pre-processing
+
+ +
+
+

Basic

+
+ + + + +
+

Configuration

+
+
+ + +
+
+ + +
+
+

Extent

+
+
+ + +
+
+ + +
+
+
+ + +
+
+ + +
+
+ +
+
+
Not Connected
+
+ + +
+
+ + + + + + + + + + + + + +
NameTime ElapsedStatusOutputs Saved
+
+ + + + + + \ No newline at end of file diff --git a/api/frontend/gui/wsgi.py b/api/frontend/gui/wsgi.py new file mode 100644 index 000000000..b9303d13e --- /dev/null +++ b/api/frontend/gui/wsgi.py @@ -0,0 +1,4 @@ +from gui import app + +if __name__ == "__main__": + app.run() \ No newline at end of file diff --git a/api/frontend/nginx-dev.conf b/api/frontend/nginx-dev.conf new file mode 100644 index 000000000..97e37c1f0 --- /dev/null +++ b/api/frontend/nginx-dev.conf @@ -0,0 +1,72 @@ +user nginx; +worker_processes 1; +pid /var/run/nginx.pid; +daemon off; + +events { + worker_connections 512; + # multi_accept on; +} + +http { + sendfile on; + tcp_nopush on; + tcp_nodelay on; + proxy_connect_timeout 300; + proxy_send_timeout 300; + proxy_read_timeout 90m; + send_timeout 300; + keepalive_timeout 65; + types_hash_max_size 2048; + + include /etc/nginx/mime.types; + default_type application/octet-stream; + + log_format upstream_time '$remote_addr - $remote_user [$time_local] ' + '"$request" $status $body_bytes_sent ' + '"$http_referer" "$http_user_agent"' + 'rt=$request_time uct="$upstream_connect_time" uht="$upstream_header_time" urt="$upstream_response_time"'; + + access_log /var/log/nginx/access.log upstream_time; + error_log /var/log/nginx/error.log warn; + + server { + listen 80; + client_header_buffer_size 64k; + large_client_header_buffers 4 64k; + server_name _; + root /var/www/; + + gzip on; + gzip_types application/json; + proxy_http_version 1.1; + + location /stats/nginx { + stub_status on; + } + + # Node side (these should only be used if the frontend is on the same machine as the connector) + location / { + proxy_pass http://fim_node_connector:6000/; + proxy_set_header Host $host; + proxy_set_header X-Forwarded-Proto $scheme; + } + + location /socket.io { + proxy_http_version 1.1; + proxy_buffering off; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "Upgrade"; + proxy_pass http://fim_node_connector:6000/socket.io; + proxy_set_header Host $host; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # Frontend Side + location /api { + proxy_pass http://fim_frontend_gui:5000/; + proxy_set_header Host $host; + proxy_set_header X-Forwarded-Proto $scheme; + } + } +} diff --git a/api/frontend/nginx-prod.conf b/api/frontend/nginx-prod.conf new file mode 100644 index 000000000..878d423fc --- /dev/null +++ b/api/frontend/nginx-prod.conf @@ -0,0 +1,55 @@ +user nginx; +worker_processes 1; +pid /var/run/nginx.pid; +daemon off; + +events { + worker_connections 512; + # multi_accept on; +} + +http { + sendfile on; + tcp_nopush on; + tcp_nodelay on; + proxy_connect_timeout 300; + proxy_send_timeout 300; + proxy_read_timeout 90m; + send_timeout 300; + keepalive_timeout 65; + types_hash_max_size 2048; + + include /etc/nginx/mime.types; + default_type application/octet-stream; + + log_format upstream_time '$remote_addr - $remote_user [$time_local] ' + '"$request" $status $body_bytes_sent ' + '"$http_referer" "$http_user_agent"' + 'rt=$request_time uct="$upstream_connect_time" uht="$upstream_header_time" urt="$upstream_response_time"'; + + access_log /var/log/nginx/access.log upstream_time; + error_log /var/log/nginx/error.log warn; + + server { + listen 80; + client_header_buffer_size 64k; + large_client_header_buffers 4 64k; + server_name _; + root /var/www/; + + gzip on; + gzip_types application/json; + proxy_http_version 1.1; + + location /stats/nginx { + stub_status on; + } + + # Frontend Side + location /api { + proxy_pass http://fim_frontend_gui:5000/; + proxy_set_header Host $host; + proxy_set_header X-Forwarded-Proto $scheme; + } + } +} diff --git a/api/frontend/output_handler/Dockerfile b/api/frontend/output_handler/Dockerfile new file mode 100644 index 000000000..68498a6d3 --- /dev/null +++ b/api/frontend/output_handler/Dockerfile @@ -0,0 +1,12 @@ +FROM python:3.8.5-slim-buster + +ENV PYTHONUNBUFFERED 1 +RUN mkdir -p /opt/output_handler +WORKDIR /opt/output_handler + +COPY requirements.txt . +RUN pip install -r requirements.txt --no-cache-dir +COPY . /opt/output_handler + +RUN chmod +x /opt/output_handler/entrypoint.sh +ENTRYPOINT ["/opt/output_handler/entrypoint.sh"] diff --git a/api/frontend/output_handler/entrypoint.sh b/api/frontend/output_handler/entrypoint.sh new file mode 100755 index 000000000..248541b09 --- /dev/null +++ b/api/frontend/output_handler/entrypoint.sh @@ -0,0 +1,6 @@ +#!/bin/sh + +umask 002 +cd /opt/output_handler/ +echo "Starting Output Handler" +python ./output_handler.py \ No newline at end of file diff --git a/api/frontend/output_handler/output_handler.py b/api/frontend/output_handler/output_handler.py new file mode 100644 index 000000000..1d803a696 --- /dev/null +++ b/api/frontend/output_handler/output_handler.py @@ -0,0 +1,46 @@ +import os +import time + +import socketio + +SOCKET_URL = os.environ.get('SOCKET_URL') + +def handle_outputs(data): + job_name = data['job_name'] + directory_path = data['directory_path'] + file_name = data['file_name'] + file_chunk = data['file_chunk'] + chunk_index = data['chunk_index'] + + # Create folder if it doesn't yet exist and set writing mode + mode = 'ab' + if chunk_index == 0: + mode = 'wb' + try: + os.makedirs(directory_path) + except: + pass + + # Write binary data to file + with open(f"{directory_path}/{file_name}", mode) as binary_file: + print(f"Writing chunk {chunk_index} for file {directory_path}/{file_name}") + binary_file.write(file_chunk) + + sio.emit('output_handler_finished_file_chunk', {'job_name': job_name, 'file_path': f"{directory_path}/{file_name}"}) + +sio = socketio.Client() + +@sio.event +def connect(): + print("Output Handler Connected!") + sio.emit('output_handler_connected') + +@sio.event +def disconnect(): + print('disconnected from server') + +@sio.on('new_job_outputs') +def ws_new_job_outputs(data): + handle_outputs(data) + +sio.connect(SOCKET_URL) \ No newline at end of file diff --git a/api/frontend/output_handler/requirements.txt b/api/frontend/output_handler/requirements.txt new file mode 100644 index 000000000..a2217b4e9 --- /dev/null +++ b/api/frontend/output_handler/requirements.txt @@ -0,0 +1,2 @@ +python-engineio[client]==4.0.0 +python-socketio[client]==5.0.3 \ No newline at end of file diff --git a/api/node/.env-template b/api/node/.env-template new file mode 100644 index 000000000..352f2d2ca --- /dev/null +++ b/api/node/.env-template @@ -0,0 +1,6 @@ +DATA_PATH= +DOCKER_IMAGE_PATH= +SOCKET_URL= +FRONTEND_URL= +GITHUB_REPO=https://github.com/NOAA-OWP/cahaba.git +MAX_ALLOWED_CPU_CORES= diff --git a/api/node/connector/Dockerfile b/api/node/connector/Dockerfile new file mode 100644 index 000000000..091fdb364 --- /dev/null +++ b/api/node/connector/Dockerfile @@ -0,0 +1,16 @@ +FROM docker:20.10.2-dind + +RUN apk add --no-cache python3 python3-dev py3-pip build-base openssl-dev libffi-dev git + +ENV PYTHONUNBUFFERED 1 +RUN mkdir -p /opt/connector +WORKDIR /opt/connector + +COPY requirements.txt . +RUN pip3 install -r requirements.txt --no-cache-dir +COPY . /opt/connector + +EXPOSE 6000 + +RUN chmod +x /opt/connector/entrypoint.sh +ENTRYPOINT ["/opt/connector/entrypoint.sh"] diff --git a/api/node/connector/connector.py b/api/node/connector/connector.py new file mode 100644 index 000000000..117448d4b --- /dev/null +++ b/api/node/connector/connector.py @@ -0,0 +1,227 @@ +import eventlet +eventlet.monkey_patch() + +import os +import re +import time +import random +import logging +import subprocess +from datetime import date + +from flask import Flask, request +from flask_socketio import SocketIO, emit + +DATA_PATH = os.environ.get('DATA_PATH') +DOCKER_IMAGE_PATH = os.environ.get('DOCKER_IMAGE_PATH') +SOCKET_URL = os.environ.get('SOCKET_URL') +FRONTEND_URL = os.environ.get('FRONTEND_URL') +GITHUB_REPO = os.environ.get('GITHUB_REPO') + +app = Flask(__name__) +socketio = SocketIO(app, cors_allowed_origins=[SOCKET_URL, FRONTEND_URL, "http://fim_node_connector:6000"]) + +shared_data = { + 'handler_sid': None, + 'updater_sid': None +} + +@app.route('/') +def main(): + return '

Nothing to see here....

' + +@socketio.on('connect') +def ws_conn(): + print('user connected!') + emit('is_connected', True) + +@socketio.on('disconnect') +def ws_disconn(): + print('user disconnected!') + emit('is_connected', False) + +@socketio.on('update') +def ws_update(data): + emit('client_update', data, broadcast=True) + +@socketio.on('output_handler_connected') +def ws_output_handler_connected(): + print('handler_sid: ', request.sid) + shared_data['handler_sid'] = request.sid + emit('retry_saving_files', room=shared_data['updater_sid']) + +@socketio.on('updater_connected') +def ws_updater_connected(): + print('updater_sid: ', request.sid) + shared_data['updater_sid'] = request.sid + emit('retry_saving_files', room=shared_data['updater_sid']) + +@socketio.on('ready_for_output_handler') +def ws_ready_for_output_handler(data): + job_name = data['job_name'] + path = data['path'] + chunk_index = data['chunk_index'] + + if shared_data['handler_sid'] == None: + print("output handler not connected!") + emit('retry_saving_files') + return + + # Split up path into parts for the output handler + path_parts = re.search(rf"(.+)/(.+)", path) + directory_path = path_parts.group(1) + file_name = path_parts.group(2) + + file_read_start = time.time() + with open(path, "rb") as binary_file: + # Read and emit file chunk by chunk (50MB at a time) + binary_file.seek(chunk_index * 52428800) + file_chunk = binary_file.read(52428800) + + if len(file_chunk) == 0: + print('End of File') + emit('file_saved', { + 'job_name': job_name, + 'file_path': path + }, room=shared_data['updater_sid']) + return + + print("Sending to output handler", path, "Chunk:", chunk_index) + emit('new_job_outputs', { + 'job_name': job_name, + 'directory_path': directory_path, + 'file_name': file_name, + 'file_chunk': file_chunk, + 'chunk_index': chunk_index + }, room=shared_data['handler_sid']) + +@socketio.on('output_handler_finished_file_chunk') +def output_handler_finished_file_chunk(data): + job_name = data['job_name'] + file_path = data['file_path'] + + print('done saving chunk', job_name, file_path) + emit('file_chunk_saved', { + 'job_name': job_name, + 'file_path': file_path, + }, room=shared_data['updater_sid']) + +@socketio.on('new_job') +def ws_new_job(job_params): + job_type = job_params['job_type'] + + if job_type == 'fim_run': + validation_errors = [] + + # Get Preset Option + preset = job_params['preset'] + + # Validate Hucs Name Option + if preset == 'custom': + hucs_raw = job_params['hucs'].replace(',', ' ').split() + parallel_jobs = len(hucs_raw) + hucs_type = len(hucs_raw[0]) + hucs = ' '.join(hucs_raw) + invalid_hucs = re.search('[^0-9 ]', hucs) + if invalid_hucs: validation_errors.append('Invalid Huc(s)') + else: + hucs = f"/data/inputs/huc_lists/{preset}" + parallel_jobs = 0 + hucs_type = 0 + + # Validate Git Branch Option + branch = '' + branch_exists = subprocess.run(['git', 'ls-remote', '--heads', GITHUB_REPO, job_params['git_branch'].replace(' ', '_')], stdout=subprocess.PIPE).stdout.decode('utf-8') + if branch_exists: branch = job_params['git_branch'].replace(' ', '_') + else: validation_errors.append('Git Branch Does Not Exist') + + # Validate Extent Option + valid_extents = ['FR', 'MS'] + extents = [] + for extent in job_params['extents']: + if extent in valid_extents: + extents.append(extent) + else: + validation_errors.append('Invalid Extent Option') + + # Validate Configuration Option + config_path = '' + if job_params['configuration'] == 'default': config_path = './foss_fim/config/params_template.env' + elif job_params['configuration'] == 'calibrated': config_path = './foss_fim/config/params_calibrated.env' + else: validation_errors.append('Invalid Configuration Option') + + # Validate Dev Run Option + if job_params['dev_run'] : dev_run = True + else: dev_run = False + + # Validate Viz Run Option + if job_params['viz_run'] : viz_run = True + else: viz_run = False + + if len(validation_errors) == 0: + for extent in extents: + # Validate Job Name Option + job_name = f"apijob_{job_params['job_name'].replace(' ', '_')[0:50]}_fim_run_{extent.lower()}{'_c' if job_params['configuration'] == 'calibrated' else ''}{'_v' if viz_run == True else ''}_apijob_{branch}_{date.today().strftime('%d%m%Y')}_{random.randint(0, 99999)}" + print(f"adding job {job_name} {branch} {preset} {hucs} {parallel_jobs} {hucs_type} {extent.lower()} {config_path} {dev_run} {viz_run}") + emit('add_job_to_queue', { + 'job_type': 'fim_run', + 'job_name': job_name, + 'branch': branch, + 'hucs': hucs, + 'parallel_jobs': parallel_jobs, + 'hucs_type': hucs_type, + 'extent': extent, + 'config_path': config_path, + 'dev_run': dev_run, + 'viz_run': viz_run, + }, room=shared_data['updater_sid']) + print('fim_run job added') + emit('job_added', 'fim_run') + else: + emit('validation_errors', validation_errors) + + elif job_type == 'release': + job_version_major = job_params['job_version_major'] + job_version_minor = job_params['job_version_minor'] + job_version_patch = job_params['job_version_patch'] + + # TODO: validate version number + + job_name_base = f"fim_3_{job_version_major}_{job_version_minor}_{job_version_patch}" + + prev_job_version_major = job_params['prev_job_version_major'] + prev_job_version_minor = job_params['prev_job_version_minor'] + prev_job_version_patch = job_params['prev_job_version_patch'] + + prev_version_base = f"fim_3_{prev_job_version_major}_{prev_job_version_minor}_{prev_job_version_patch}" + + huc_lists = ['/data/inputs/huc_lists/included_huc8.lst', '/data/inputs/huc_lists/included_huc8_ms.lst'] + extents = ['FR', 'MS'] + + for hucs, extent in zip(huc_lists, extents): + # Validate Job Name Option + prev_version = f"{prev_version_base}_{extent.lower()}_c" + job_name = f"apijob_{job_name_base}_{extent.lower()}_c_apijob_dev_{date.today().strftime('%d%m%Y')}_{random.randint(0, 99999)}" + print(f"adding job {job_name} {hucs} {extent.lower()}") + emit('add_job_to_queue', { + 'job_type': 'release', + 'job_name': job_name, + 'hucs': hucs, + 'extent': extent, + 'previous_major_fim_version': prev_version + }, room=shared_data['updater_sid']) + print('release job added') + emit('job_added', 'release') + + @socketio.on('cancel_job') + def ws_cancel_job(job_params): + # Validate Job Name Option + job_name = job_params['job_name'] + + emit('remove_job_from_queue', {'job_name': job_name}, room=shared_data['updater_sid']) + print('job canceled') + emit('job_canceled', 'fim_run') + + +if __name__ == '__main__': + socketio.run(app, host="0.0.0.0", port="6000") diff --git a/api/node/connector/entrypoint.sh b/api/node/connector/entrypoint.sh new file mode 100755 index 000000000..d6d853d6a --- /dev/null +++ b/api/node/connector/entrypoint.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +cd /opt/connector/ +echo "Starting Connector" +python3 ./connector.py diff --git a/api/node/connector/requirements.txt b/api/node/connector/requirements.txt new file mode 100644 index 000000000..e0b0b19de --- /dev/null +++ b/api/node/connector/requirements.txt @@ -0,0 +1,3 @@ +flask==1.1.2 +flask-socketio==5.0.0 +eventlet==0.31.0 diff --git a/api/node/docker-compose-dev.yml b/api/node/docker-compose-dev.yml new file mode 100644 index 000000000..9823afad9 --- /dev/null +++ b/api/node/docker-compose-dev.yml @@ -0,0 +1,38 @@ +version: '3.5' +services: + fim_node_connector: + image: fim_node_connector + build: + context: ./connector + container_name: fim_node_connector + env_file: + - .env + restart: always + expose: + - "6000" + networks: + fim: + aliases: + - fimnodeconnector + volumes: + - ${DATA_PATH}:/data/ + - /var/run/docker.sock:/var/run/docker.sock + - ./connector/connector.py:/opt/connector/connector.py + fim_node_updater: + image: fim_node_updater + build: + context: ./updater + container_name: fim_node_updater + restart: always + depends_on: + - fim_node_connector + networks: + - fim + volumes: + - ${DATA_PATH}:/data/ + - /var/run/docker.sock:/var/run/docker.sock + - ./updater/updater.py:/opt/updater/updater.py +networks: + fim: + name: fim + \ No newline at end of file diff --git a/api/node/docker-compose-prod.yml b/api/node/docker-compose-prod.yml new file mode 100644 index 000000000..f9787ab59 --- /dev/null +++ b/api/node/docker-compose-prod.yml @@ -0,0 +1,53 @@ +version: '3.5' +services: + fim_node_connector: + image: fim_node_connector + build: + context: ./connector + container_name: fim_node_connector + env_file: + - .env + restart: always + expose: + - "6000" + networks: + fim: + aliases: + - fimnodeconnector + volumes: + - ${DATA_PATH}:/data/ + - /var/run/docker.sock:/var/run/docker.sock + fim_node_updater: + image: fim_node_updater + build: + context: ./updater + container_name: fim_node_updater + env_file: + - .env + restart: always + depends_on: + - fim_node_connector + networks: + - fim + volumes: + - ${DATA_PATH}:/data/ + - /var/run/docker.sock:/var/run/docker.sock + fim_nginx: + image: nginx + container_name: fim_nginx + restart: always + depends_on: + - fim_node_connector + ports: + - "80:80" + volumes: + - ./nginx.conf:/etc/nginx/nginx.conf:ro + networks: + - fim + command: [nginx] + +networks: + fim: + name: fim + + \ No newline at end of file diff --git a/api/node/nginx.conf b/api/node/nginx.conf new file mode 100644 index 000000000..c2fc935bd --- /dev/null +++ b/api/node/nginx.conf @@ -0,0 +1,64 @@ +user nginx; +worker_processes 1; +pid /var/run/nginx.pid; +daemon off; + +events { + worker_connections 512; + # multi_accept on; +} + +http { + sendfile on; + tcp_nopush on; + tcp_nodelay on; + proxy_connect_timeout 300; + proxy_send_timeout 300; + proxy_read_timeout 90m; + send_timeout 300; + keepalive_timeout 65; + types_hash_max_size 2048; + + include /etc/nginx/mime.types; + default_type application/octet-stream; + + log_format upstream_time '$remote_addr - $remote_user [$time_local] ' + '"$request" $status $body_bytes_sent ' + '"$http_referer" "$http_user_agent"' + 'rt=$request_time uct="$upstream_connect_time" uht="$upstream_header_time" urt="$upstream_response_time"'; + + access_log /var/log/nginx/access.log upstream_time; + error_log /var/log/nginx/error.log warn; + + server { + listen 80; + client_header_buffer_size 64k; + large_client_header_buffers 4 64k; + server_name _; + root /var/www/; + + gzip on; + gzip_types application/json; + proxy_http_version 1.1; + + location /stats/nginx { + stub_status on; + } + + location / { + proxy_pass http://fim_node_connector:6000/; + proxy_set_header Host $host; + proxy_set_header X-Forwarded-Proto $scheme; + } + + location /socket.io { + proxy_http_version 1.1; + proxy_buffering off; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "Upgrade"; + proxy_pass http://fim_node_connector:6000/socket.io; + proxy_set_header Host $host; + proxy_set_header X-Forwarded-Proto $scheme; + } + } +} \ No newline at end of file diff --git a/api/node/updater/Dockerfile b/api/node/updater/Dockerfile new file mode 100644 index 000000000..d62a77652 --- /dev/null +++ b/api/node/updater/Dockerfile @@ -0,0 +1,14 @@ +FROM docker:20.10.2-dind + +RUN apk add --no-cache python3 python3-dev py3-pip build-base openssl-dev libffi-dev git + +ENV PYTHONUNBUFFERED 1 +RUN mkdir -p /opt/updater +WORKDIR /opt/updater + +COPY requirements.txt . +RUN pip3 install -r requirements.txt --no-cache-dir +COPY . /opt/updater + +RUN chmod +x /opt/updater/entrypoint.sh +ENTRYPOINT ["/opt/updater/entrypoint.sh"] diff --git a/api/node/updater/entrypoint.sh b/api/node/updater/entrypoint.sh new file mode 100755 index 000000000..0f74f2541 --- /dev/null +++ b/api/node/updater/entrypoint.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +cd /opt/updater/ +echo "Starting Update Loop" +python3 ./updater.py diff --git a/api/node/updater/requirements.txt b/api/node/updater/requirements.txt new file mode 100644 index 000000000..a2217b4e9 --- /dev/null +++ b/api/node/updater/requirements.txt @@ -0,0 +1,2 @@ +python-engineio[client]==4.0.0 +python-socketio[client]==5.0.3 \ No newline at end of file diff --git a/api/node/updater/updater.py b/api/node/updater/updater.py new file mode 100644 index 000000000..90fc0da71 --- /dev/null +++ b/api/node/updater/updater.py @@ -0,0 +1,530 @@ +import os +import re +import glob +import math +import time +import json +import shutil +import logging +import subprocess + +import socketio + +DATA_PATH = os.environ.get('DATA_PATH') +DOCKER_IMAGE_PATH = os.environ.get('DOCKER_IMAGE_PATH') +GITHUB_REPO = os.environ.get('GITHUB_REPO') +MAX_ALLOWED_CPU_CORES = int(os.environ.get('MAX_ALLOWED_CPU_CORES')) + +shared_data = { + 'connected': False, + 'current_saving_job': '' +} + +buffer_jobs = [] +buffer_remove_jobs = [] +current_jobs = {} +if os.path.exists('/data/outputs/current_jobs.json'): + with open('/data/outputs/current_jobs.json') as f: + current_jobs = json.load(f) + for job_name in current_jobs.keys(): + if 'is_actively_saving' in current_jobs[job_name] and current_jobs[job_name]['is_actively_saving'] == True: + shared_data['current_saving_job'] = current_jobs[job_name] + + +# Get all the current running jobs from the list of docker containers, store that data in a dictionary +# along with any other needed metadata (like if it's still running, doing post processing, copying outputs +# to its destination, etc), and then update the websocket server of the status of the jobs. +def update_loop(): + while True: + # If there are no current jobs, just check every 10 seconds till there is + if len(current_jobs.keys()) == 0: sio.sleep(10) + + while len(buffer_jobs) > 0: + new_job = buffer_jobs.pop() + current_jobs[new_job['job_name']] = new_job + + while len(buffer_remove_jobs) > 0: + job_to_remove = buffer_remove_jobs.pop() + if job_to_remove['job_name'] in current_jobs: + current_jobs[job_to_remove['job_name']]['status'] = 'Cancelled' + + # Get list of current docker containers that are fim run jobs + # docker ps --all --filter=name=apijob --format='{{.Names}} {{.State}}' + containers_raw = os.popen("docker ps --all --filter=name=apijob --format='{{.Names}} {{.State}}'").read().splitlines() + containers_split = [ line.split() for line in containers_raw ] + container_states = { name: state for (name, state) in containers_split } + + jobs_to_delete = [] + for job_name in current_jobs.keys(): + sio.sleep(0) + if job_name in container_states: + current_jobs[job_name]['container_state'] = container_states[job_name] + + # If the user chooses to cancel the job early + if current_jobs[job_name]['status'] == 'Cancelled': + # If the docker container is running, stop and remove it + if current_jobs[job_name]['time_elapsed'] > 0 and current_jobs[job_name]['container_state'] != 'exited': + subprocess.call(f"docker container stop {job_name}", shell=True) + subprocess.call(f"docker container rm {job_name}", shell=True) + + print("output_handler finished, deleted temp source files and output files") + temp_path = f"/data/temp/{job_name}" + if os.path.isdir(temp_path): + shutil.rmtree(temp_path) + + outputs_path = f"/data/outputs/{current_jobs[job_name]['nice_name']}" + if os.path.isdir(outputs_path): + shutil.rmtree(outputs_path) + + jobs_to_delete.append(job_name) + + active_statuses = [ + 'In Progress', + 'Ready for Synthesize Test Cases', + 'Running Synthesize Test Cases', + 'Ready for Eval Plots', + 'Running Eval Plots', + 'Ready for Generate Categorical FIM', + 'Running Generate Categorical FIM', + 'Ready to Save File', + 'Saving File' + ] + # TODO: separate list for queuing so that one job can save and another run + + # Update the time elapsed for all jobs that are currently in progress or saving outputs + if current_jobs[job_name]['status'] in active_statuses: + current_jobs[job_name]['time_elapsed'] = math.ceil(time.time() - current_jobs[job_name]['time_started']) + + # TODO: While job is in progress, keep track of how many hucs are done and overall progress % + + # Once resources recome available, start a new job that is in queue + + if current_jobs[job_name]['status'] == 'In Queue': + current_jobs[job_name]['time_started'] = time.time() + + total_active_cores = 0 + for j in current_jobs.keys(): + if current_jobs[j]['status'] in active_statuses: + # This is to account for the fact that HUC6's take a lot more resources to run. + # (not necessarily cpu cores but rather RAM, so this artificially reduces how many jobs can run when HUC6's + # are running) + # HACK: this is more of a temporary solution until we no longer need to run HUC6's + if current_jobs[j]['hucs_type'] == '6': + total_active_cores += current_jobs[j]['parallel_jobs'] * 5 + else: + total_active_cores += current_jobs[j]['parallel_jobs'] + + # Machine has enough resources to run a new job + potential_active_cores = 0 + if current_jobs[job_name]['hucs_type'] == '6': + potential_active_cores = current_jobs[job_name]['parallel_jobs'] * 5 + total_active_cores + else: + potential_active_cores = current_jobs[job_name]['parallel_jobs'] + total_active_cores + + # print(f"Checking whether a new job can start {potential_active_cores} <= {MAX_ALLOWED_CPU_CORES}") + # print(potential_active_cores <= MAX_ALLOWED_CPU_CORES) + if potential_active_cores <= MAX_ALLOWED_CPU_CORES: + job_name = current_jobs[job_name]['job_name'] + nice_name = current_jobs[job_name]['nice_name'] + branch = current_jobs[job_name]['branch'] + hucs = current_jobs[job_name]['hucs'] + parallel_jobs = current_jobs[job_name]['parallel_jobs'] + extent = current_jobs[job_name]['extent'] + config_path = current_jobs[job_name]['config_path'] + dev_run = current_jobs[job_name]['dev_run'] + viz_run = current_jobs[job_name]['viz_run'] + + # Clone github repo, with specific branch, to a temp folder + print(f'cd /data/temp && git clone -b {branch} {GITHUB_REPO} {job_name} && chmod -R 777 {job_name} && cp .env {job_name}/tools/.env') + subprocess.call(f'cd /data/temp && git clone -b {branch} {GITHUB_REPO} {job_name} && chmod -R 777 {job_name} && cp .env {job_name}/tools/.env', shell=True) + + # Kick off the new job as a docker container with the new cloned repo as the volume + print(f"docker run -d --name {job_name} -v {DATA_PATH}:/data/ -v {DATA_PATH}temp/{job_name}/:/foss_fim {DOCKER_IMAGE_PATH} fim_run.sh -u \"{hucs}\" -e {extent} -c {config_path} -n {nice_name} -o {'' if dev_run else '-p'} {'-v' if viz_run else ''} -j {parallel_jobs}") + subprocess.call(f"docker run -d --name {job_name} -v {DATA_PATH}:/data/ -v {DATA_PATH}temp/{job_name}/:/foss_fim {DOCKER_IMAGE_PATH} fim_run.sh -u \"{hucs}\" -e {extent} -c {config_path} -n {nice_name} -o {'' if dev_run else '-p'} {'-v' if viz_run else ''} -j {parallel_jobs}", shell=True) + current_jobs[job_name]['status'] = 'In Progress' + + # Once the Docker container is done, either save outputs or run release + if current_jobs[job_name]['status'] == 'In Progress' and current_jobs[job_name]['container_state'] == 'exited': + + # Get container exit code, get the docker log, and then remove container + exit_code_raw = os.popen(f"docker inspect {job_name}" + " --format='{{.State.ExitCode}}'").read().splitlines() + + print("Exit code") + print(exit_code_raw) + print(exit_code_raw[0]) + try: + print(int(exit_code_raw[0])) + except: + pass + + exit_code = int(exit_code_raw[0]) + current_jobs[job_name]['exit_code'] = exit_code + subprocess.call(f"docker logs {job_name} >& /data/outputs/{current_jobs[job_name]['nice_name']}/logs/docker.log", shell=True) + subprocess.call(f"docker container rm {job_name}", shell=True) + + if current_jobs[job_name]['job_type'] == 'fim_run': + for path, folders, files in os.walk(f"/data/outputs/{current_jobs[job_name]['nice_name']}"): + for file in files: + current_jobs[job_name]['output_files_saved'][os.path.join(path, file)] = 0 + + current_jobs[job_name]['total_output_files_length'] = len(current_jobs[job_name]['output_files_saved'].keys()) + current_jobs[job_name]['status'] = 'Ready to Save File' + elif current_jobs[job_name]['job_type'] == 'release': + # Move outputs to previous_fim and set them to be copied to the dev machine + if os.path.isdir(f"/data/previous_fim/{current_jobs[job_name]['nice_name']}"): + shutil.rmtree(f"/data/previous_fim/{current_jobs[job_name]['nice_name']}") + if os.path.isdir(f"/data/outputs/{current_jobs[job_name]['nice_name']}"): shutil.move(f"/data/outputs/{current_jobs[job_name]['nice_name']}", '/data/previous_fim') + for path, folders, files in os.walk(f"/data/previous_fim/{current_jobs[job_name]['nice_name']}"): + for file in files: + current_jobs[job_name]['output_files_saved'][os.path.join(path, file)] = 0 + current_jobs[job_name]['total_output_files_length'] = len(current_jobs[job_name]['output_files_saved'].keys()) + current_jobs[job_name]['status'] = 'Ready for Synthesize Test Cases' + + if current_jobs[job_name]['status'] == 'Ready for Synthesize Test Cases': + job_name = current_jobs[job_name]['job_name'] + nice_name = current_jobs[job_name]['nice_name'] + parallel_jobs = current_jobs[job_name]['parallel_jobs'] + + # Kick off the new job as a docker container to run eval metrics + print(f"docker run -d --name {job_name} -v {DATA_PATH}:/data/ -v {DATA_PATH}temp/{job_name}/:/foss_fim -w /foss_fim/tools {DOCKER_IMAGE_PATH} /foss_fim/tools/synthesize_test_cases.py -c PREV --fim-version {nice_name} --job-number {parallel_jobs} -m /data/test_cases/metrics_library/all_official_versions.csv") + subprocess.call(f"docker run -d --name {job_name} -v {DATA_PATH}:/data/ -v {DATA_PATH}temp/{job_name}/:/foss_fim -w /foss_fim/tools {DOCKER_IMAGE_PATH} /foss_fim/tools/synthesize_test_cases.py -c PREV --fim-version {nice_name} --job-number {parallel_jobs} -m /data/test_cases/metrics_library/all_official_versions.csv", shell=True) + current_jobs[job_name]['container_state'] = 'running' + current_jobs[job_name]['status'] = 'Running Synthesize Test Cases' + + # Once the Docker container is done, save outputs + if current_jobs[job_name]['status'] == 'Running Synthesize Test Cases' and current_jobs[job_name]['container_state'] == 'exited': + # Get container exit code, get the docker log, and then remove container + exit_code_raw = os.popen(f"docker inspect {job_name}" + " --format='{{.State.ExitCode}}'").read().splitlines() + + print("Exit code") + print(exit_code_raw) + print(exit_code_raw[0]) + try: + print(int(exit_code_raw[0])) + except: + pass + + exit_code = int(exit_code_raw[0]) + current_jobs[job_name]['exit_code'] = exit_code + subprocess.call(f"docker logs {job_name} >& /data/previous_fim/{current_jobs[job_name]['nice_name']}/logs/synthesize_test_cases_docker.log", shell=True) + subprocess.call(f"docker container rm {job_name}", shell=True) + + current_jobs[job_name]['output_files_saved']['/data/test_cases/metrics_library/all_official_versions.csv'] = 0 + current_jobs[job_name]['output_files_saved'][f"/data/previous_fim/{current_jobs[job_name]['nice_name']}/logs/synthesize_test_cases_docker.log"] = 0 + current_jobs[job_name]['total_output_files_length'] = len(current_jobs[job_name]['output_files_saved'].keys()) + current_jobs[job_name]['status'] = 'Ready for Eval Plots' + + if current_jobs[job_name]['status'] == 'Ready for Eval Plots': + job_name = current_jobs[job_name]['job_name'] + nice_name = current_jobs[job_name]['nice_name'] + previous_major_fim_version = current_jobs[job_name]['previous_major_fim_version'] + + # Kick off the new job as a docker container to run eval plots + print(f"docker run -d --name {job_name} -v {DATA_PATH}:/data/ -v {DATA_PATH}temp/{job_name}/:/foss_fim -w /foss_fim/tools {DOCKER_IMAGE_PATH} /foss_fim/tools/eval_plots.py -m /data/test_cases/metrics_library/all_official_versions.csv -w /data/test_cases/metrics_library/all_official_versions_viz -v {previous_major_fim_version} {nice_name} -sp") + subprocess.call(f"docker run -d --name {job_name} -v {DATA_PATH}:/data/ -v {DATA_PATH}temp/{job_name}/:/foss_fim -w /foss_fim/tools {DOCKER_IMAGE_PATH} /foss_fim/tools/eval_plots.py -m /data/test_cases/metrics_library/all_official_versions.csv -w /data/test_cases/metrics_library/all_official_versions_viz -v {previous_major_fim_version} {nice_name} -sp", shell=True) + current_jobs[job_name]['container_state'] = 'running' + current_jobs[job_name]['status'] = 'Running Eval Plots' + + # Once the Docker container is done, save outputs + if current_jobs[job_name]['status'] == 'Running Eval Plots' and current_jobs[job_name]['container_state'] == 'exited': + # Get container exit code, get the docker log, and then remove container + exit_code_raw = os.popen(f"docker inspect {job_name}" + " --format='{{.State.ExitCode}}'").read().splitlines() + + print("Exit code") + print(exit_code_raw) + print(exit_code_raw[0]) + try: + print(int(exit_code_raw[0])) + except: + pass + + exit_code = int(exit_code_raw[0]) + current_jobs[job_name]['exit_code'] = exit_code + subprocess.call(f"docker logs {job_name} >& /data/previous_fim/{current_jobs[job_name]['nice_name']}/logs/eval_plots_docker.log", shell=True) + subprocess.call(f"docker container rm {job_name}", shell=True) + + current_jobs[job_name]['output_files_saved'][f"/data/previous_fim/{current_jobs[job_name]['nice_name']}/logs/eval_plots_docker.log"] = 0 + for path, folders, files in os.walk('/data/test_cases/metrics_library/all_official_versions_viz'): + for file in files: + current_jobs[job_name]['output_files_saved'][os.path.join(path, file)] = 0 + current_jobs[job_name]['total_output_files_length'] = len(current_jobs[job_name]['output_files_saved'].keys()) + + current_jobs[job_name]['status'] = 'Ready for Generate Categorical FIM' + + if current_jobs[job_name]['status'] == 'Ready for Generate Categorical FIM': + job_name = current_jobs[job_name]['job_name'] + nice_name = current_jobs[job_name]['nice_name'] + parallel_jobs = current_jobs[job_name]['parallel_jobs'] + + # Kick off the new job as a docker container to run CatFIM + print(f"docker run -d --name {job_name} -v {DATA_PATH}:/data/ -v {DATA_PATH}temp/{job_name}/:/foss_fim -w /foss_fim/tools {DOCKER_IMAGE_PATH} /foss_fim/tools/generate_categorical_fim.py -f {nice_name} -j {parallel_jobs}") + subprocess.call(f"docker run -d --name {job_name} -v {DATA_PATH}:/data/ -v {DATA_PATH}temp/{job_name}/:/foss_fim -w /foss_fim/tools {DOCKER_IMAGE_PATH} /foss_fim/tools/generate_categorical_fim.py -f /data/previous_fim/{nice_name} -j {parallel_jobs}", shell=True) + current_jobs[job_name]['container_state'] = 'running' + current_jobs[job_name]['status'] = 'Running Generate Categorical FIM' + + # Once the Docker container is done, save outputs + if current_jobs[job_name]['status'] == 'Running Generate Categorical FIM' and current_jobs[job_name]['container_state'] == 'exited': + # Get container exit code, get the docker log, and then remove container + exit_code_raw = os.popen(f"docker inspect {job_name}" + " --format='{{.State.ExitCode}}'").read().splitlines() + + print("Exit code") + print(exit_code_raw) + print(exit_code_raw[0]) + try: + print(int(exit_code_raw[0])) + except: + pass + + exit_code = int(exit_code_raw[0]) + current_jobs[job_name]['exit_code'] = exit_code + subprocess.call(f"docker logs {job_name} >& /data/previous_fim/{current_jobs[job_name]['nice_name']}/logs/generate_categorical_fim.log", shell=True) + subprocess.call(f"docker container rm {job_name}", shell=True) + + os.makedirs(f"/data/catfim/{current_jobs[job_name]['nice_name']}_temp") + if os.path.isdir(f"/data/catfim/{current_jobs[job_name]['nice_name']}"): + for path in glob.glob(f"/data/catfim/{current_jobs[job_name]['nice_name']}/**/mapping/*", recursive=True): + if not os.path.isdir(path): + shutil.move(path, f"/data/catfim/{current_jobs[job_name]['nice_name']}_temp") + filename = os.path.basename(path) + current_jobs[job_name]['output_files_saved'][os.path.join(f"/data/catfim/{current_jobs[job_name]['nice_name']}", filename)] = 0 + shutil.rmtree(f"/data/catfim/{current_jobs[job_name]['nice_name']}") + shutil.move(f"/data/catfim/{current_jobs[job_name]['nice_name']}_temp", f"/data/catfim/{current_jobs[job_name]['nice_name']}") + + current_jobs[job_name]['output_files_saved'][f"/data/previous_fim/{current_jobs[job_name]['nice_name']}/logs/generate_categorical_fim.log"] = 0 + current_jobs[job_name]['total_output_files_length'] = len(current_jobs[job_name]['output_files_saved'].keys()) + current_jobs[job_name]['status'] = 'Ready to Save File' + + # Trigger connector to transmit the outputs to the output_handler + # If the output_handler is offline, it will keep retrying until the output_handler is online + if current_jobs[job_name]['status'] == 'Ready to Save File' and (shared_data['current_saving_job'] == '' or shared_data['current_saving_job'] == current_jobs[job_name]): + print(f"{job_name} ready for output handler") + + shared_data['current_saving_job'] = current_jobs[job_name] + current_jobs[job_name]['is_actively_saving'] = True + output_to_save = {} + for path in current_jobs[job_name]['output_files_saved']: + if current_jobs[job_name]['output_files_saved'][path] != -1: + output_to_save = {'path': path, 'chunk_index': current_jobs[job_name]['output_files_saved'][path]} + + if output_to_save != {}: + if shared_data['connected']: + sio.emit('ready_for_output_handler', { + 'nice_name': current_jobs[job_name]['nice_name'], + 'job_name': job_name, + 'path': output_to_save['path'], + 'chunk_index': output_to_save['chunk_index'] + }) + current_jobs[job_name]['status'] = 'Saving File' + + # Once the output_handler is done getting the outputs and the connector deletes the temp repo source, + # mark as completed + if current_jobs[job_name]['status'] == 'Saving File': + is_done = True + for path in current_jobs[job_name]['output_files_saved']: + if current_jobs[job_name]['output_files_saved'][path] != -1: + is_done = False + break + + if is_done: + print("output_handler finished, deleted temp source files and output files") + temp_path = f"/data/temp/{job_name}" + if os.path.isdir(temp_path): + shutil.rmtree(temp_path) + + outputs_path = f"/data/outputs/{current_jobs[job_name]['nice_name']}" + if current_jobs[job_name]['job_type'] == 'release': + outputs_path = f"/data/previous_fim/{current_jobs[job_name]['nice_name']}" + destination = f"/data/viz/{current_jobs[job_name]['nice_name']}" + + if os.path.isdir(destination): + shutil.rmtree(destination) + if os.path.isdir(f"{outputs_path}/aggregate_fim_outputs"): shutil.move(f"{outputs_path}/aggregate_fim_outputs", destination) + if os.path.isdir(f"{outputs_path}/logs"): shutil.move(f"{outputs_path}/logs", f"{destination}/logs") + if os.path.isdir(f"/data/catfim/{current_jobs[job_name]['nice_name']}"): shutil.move(f"/data/catfim/{current_jobs[job_name]['nice_name']}", f"{destination}/catfim") + + if os.path.isdir(outputs_path): + shutil.rmtree(outputs_path) + if current_jobs[job_name]['job_type'] == 'release': + if os.path.isdir(f"/data/catfim/{current_jobs[job_name]['nice_name']}"): + shutil.rmtree(f"/data/catfim/{current_jobs[job_name]['nice_name']}") + try: + os.makedirs(outputs_path) + except: + pass + + current_jobs[job_name]['status'] = 'Completed' if current_jobs[job_name]['exit_code'] == 0 else 'Error' + + shared_data['current_saving_job'] = '' + current_jobs[job_name]['is_actively_saving'] = False + print(f"{job_name} completed") + # TODO: Insert Slack notification here for finished job + + # Remove job from list after it's been completed for more than 15 minutes + if (current_jobs[job_name]['status'] == 'Completed' or current_jobs[job_name]['status'] == 'Error') and \ + time.time() >= current_jobs[job_name]['time_started'] + current_jobs[job_name]['time_elapsed'] + 900: + print(f"{job_name} removed from job list") + jobs_to_delete.append(job_name) + + for job in jobs_to_delete: + del current_jobs[job] + + presets_list = [] + for path, folders, files in os.walk(f"/data/inputs/huc_lists"): + for file in files: + presets_list.append(file) + + # Send updates to the connector and write job progress to file + job_updates = [ { + 'job_name': job['job_name'], + 'nice_name': job['nice_name'], + 'status': job['status'], + 'exit_code': job['exit_code'], + 'time_elapsed': job['time_elapsed'], + 'total_output_files_length': job['total_output_files_length'], + 'current_output_files_saved_length': job['current_output_files_saved_length'], + } for job in current_jobs.values()] + + if shared_data['connected']: sio.emit('update', {'jobUpdates': job_updates, 'presetsList': presets_list}) + with open('/data/outputs/current_jobs.json.temp', 'w') as f: + json.dump(current_jobs, f) + shutil.move('/data/outputs/current_jobs.json.temp', '/data/outputs/current_jobs.json') + +sio = socketio.Client() + +@sio.event +def connect(): + print("Update Loop Connected!") + sio.emit('updater_connected') + shared_data['connected'] = True + +@sio.event +def disconnect(): + print('disconnected from server') + shared_data['connected'] = False + +@sio.on('add_job_to_queue') +def ws_add_job_to_queue(data): + job_type = data['job_type'] + if job_type == 'fim_run': + job_name = data['job_name'] + branch = data['branch'] + hucs = data['hucs'] + parallel_jobs = data['parallel_jobs'] + hucs_type = data['hucs_type'] + extent = data['extent'] + config_path = data['config_path'] + dev_run = data['dev_run'] + viz_run = data['viz_run'] + + # This is a preset list instead of a custom list of hucs + if hucs_type == 0: + if os.path.exists(hucs): + with open(hucs, "r") as preset_file: + hucs_raw = preset_file.read().splitlines() + parallel_jobs = len(hucs_raw) + hucs_type = len(hucs_raw[0]) + + parallel_jobs = parallel_jobs if parallel_jobs <= MAX_ALLOWED_CPU_CORES else MAX_ALLOWED_CPU_CORES + + buffer_jobs.append({ + 'job_type': job_type, + 'job_name': job_name, + 'branch': branch, + 'hucs': hucs, + 'parallel_jobs': parallel_jobs, + 'hucs_type': hucs_type, + 'extent': extent, + 'config_path': config_path, + 'dev_run': dev_run, + 'viz_run': viz_run, + 'nice_name': re.search(r"apijob_(.+)_apijob.+", job_name).group(1), + 'status': 'In Queue', + 'time_started': 0, + 'time_elapsed': 0, + 'output_files_saved': {}, + 'total_output_files_length': 0, + 'current_output_files_saved_length': 0, + 'output_files_saved': {}, + 'container_state': 'running', + 'exit_code': 0, + 'is_actively_saving': False + }) + elif job_type == 'release': + job_name = data['job_name'] + hucs = data['hucs'] + extent = data['extent'] + branch = 'dev' + config_path = './foss_fim/config/params_template.env' + dev_run = False + viz_run = True + previous_major_fim_version = data['previous_major_fim_version'] + + if os.path.exists(hucs): + with open(hucs, "r") as preset_file: + hucs_raw = preset_file.read().splitlines() + parallel_jobs = len(hucs_raw) + hucs_type = len(hucs_raw[0]) + + parallel_jobs = parallel_jobs if parallel_jobs <= MAX_ALLOWED_CPU_CORES else MAX_ALLOWED_CPU_CORES + + buffer_jobs.append({ + 'job_type': job_type, + 'job_name': job_name, + 'branch': branch, + 'hucs': hucs, + 'parallel_jobs': parallel_jobs, + 'hucs_type': hucs_type, + 'extent': extent, + 'config_path': config_path, + 'dev_run': dev_run, + 'viz_run': viz_run, + 'nice_name': re.search(r"apijob_(.+)_apijob.+", job_name).group(1), + 'status': 'In Queue', + 'time_started': 0, + 'time_elapsed': 0, + 'output_files_saved': {}, + 'total_output_files_length': 0, + 'current_output_files_saved_length': 0, + 'output_files_saved': {}, + 'container_state': 'running', + 'exit_code': 0, + 'is_actively_saving': False, + 'previous_major_fim_version': previous_major_fim_version + }) + +@sio.on('remove_job_from_queue') +def ws_remove_job_from_queue(data): + job_name = data['job_name'] + buffer_remove_jobs.append({'job_name': job_name}) + +# If the output_handler is offline, try the saving process again +@sio.on('retry_saving_files') +def ws_retry_saving_files(): + print('saving files failed, retrying') + for job_name in current_jobs: + if current_jobs[job_name]['status'] == "Saving File": + for path in current_jobs[job_name]['output_files_saved']: + if current_jobs[job_name]['output_files_saved'][path] != -1: + current_jobs[job_name]['output_files_saved'][path] = 0 + + current_jobs[job_name]['status'] = 'Ready to Save File' + +@sio.on('file_chunk_saved') +def ws_file_chunk_saved(data): + job_name = data['job_name'] + file_path = data['file_path'] + + current_jobs[job_name]['output_files_saved'][file_path] += 1 + current_jobs[job_name]['status'] = 'Ready to Save File' + +@sio.on('file_saved') +def ws_file_saved(data): + job_name = data['job_name'] + file_path = data['file_path'] + + current_jobs[job_name]['output_files_saved'][file_path] = -1 + current_jobs[job_name]['current_output_files_saved_length'] += 1 + current_jobs[job_name]['status'] = 'Ready to Save File' + +sio.connect('http://fim_node_connector:6000/') +update_loop() diff --git a/config/params_calibrated.env b/config/params_calibrated.env deleted file mode 100644 index 09e7f0167..000000000 --- a/config/params_calibrated.env +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash - -#### geospatial parameters #### -export negativeBurnValue=1000 -export buffer=70 -export maxSplitDistance_meters=1500 -export manning_n="/foss_fim/config/mannings_calibrated.json" -export stage_min_meters=0 -export stage_interval_meters=0.3048 -export stage_max_meters=25 -export slope_min=0.001 -export ms_buffer_dist=7000 -export lakes_buffer_dist_meters=20 - -#### computational parameters #### -export ncores_gw=1 # mpi number of cores for gagewatershed -export ncores_fd=1 # mpi number of cores for flow directions -export defaultMaxJobs=1 # default number of max concurrent jobs to run -export memfree=0G # min free memory required to start a new job or keep youngest job alive - -#### logging parameters #### -export startDiv="\n##########################################################################\n" -export stopDiv="\n##########################################################################" diff --git a/config/params_template.env b/config/params_template.env index f87b4c15a..41df712c6 100644 --- a/config/params_template.env +++ b/config/params_template.env @@ -1,21 +1,55 @@ #!/bin/bash +#### hydroconditioning parameters #### +export negative_burn_value=1000 +export agree_DEM_buffer=70 +export wbd_buffer=5000 +export thalweg_lateral_elev_threshold=3 + #### geospatial parameters #### -export negativeBurnValue=1000 -export buffer=70 -export maxSplitDistance_meters=1500 +export max_split_distance_meters=1500 +export ms_buffer_dist=7000 +export lakes_buffer_dist_meters=20 + +#### rating curve parameters #### export manning_n="/foss_fim/config/mannings_default.json" export stage_min_meters=0 export stage_interval_meters=0.3048 export stage_max_meters=25 export slope_min=0.001 -export ms_buffer_dist=7000 -export lakes_buffer_dist_meters=20 +export min_catchment_area=0.25 +export min_stream_length=0.5 + +#### bathy SRC estimation parameters #### +export bathy_src_toggle=True # Toggle to run BARC routine (True=on; False=off) +export bankfull_input_table="data/inputs/bathymetry/nwm_flow_bieger_qreg.csv" # input file location with feature_id and channel geometry attributes +# Option 1: Bieger et al. 2015 discharge regression --> nwm_flow_bieger_qreg.csv +# Option 2: Bieger et al. 2015 drainage area regression (obtained from Wieczorek (2018) database) --> BANKFULL_CONUS.txt +# Option 3: NWM Route Link bankfull geometry (Blackburn-Lynch regression) --> nwm_route_link_geom_BED.csv +export src_plot_option="False" # optional toggle to create SRC comparison plots for each hydroid (Warning: longer run times) +export surf_area_thalweg_ratio_flag=10 # Flag: Surface area ratio value to identify possible thalweg notch "jump" (SA x+1 / SA x) +export thalweg_stg_search_max_limit=3 # Threshold: Stage value limit below which to look for the surface area ratio flag (only flag thalweg notch below this threshold) +export bathy_xs_area_chg_flag=1 # Flag: Cross section area limit to cap the amount of bathy XS area added to the SRC. Limits the bathy_calc_xs_area/ BANKFULL_XSEC_AREA to the specified threshold +export bankful_xs_area_ratio_flag=10 # Flag: Identify bogus BARC adjusted values where the regression bankfull XS Area/SRC bankfull area is > threshold (topwidth crosswalk issues or bad bankfull regression data points??) +export thalweg_hyd_radius_flag=10 # Flag: Idenitify possible erroneous BARC-adjusted hydraulic radius values. BARC discharge values greater than the specified threshold and within the thal_stg_limit are set to 0 +export ignore_streamorders=10 # Ignore BARC calculations for streamorders >= this value (10 is Mississippi R) + +#### estimating bankfull stage in SRCs #### +export src_bankfull_toggle="True" # Toggle to run identify_bankfull routine (True=on; False=off) +export src_bankfull_plot_option="False" # optional toggle to create SRC comparison plots for each hydroid (Warning: longer run times) +export bankfull_flows_file="data/inputs/rating_curve/bankfull_flows/nwm_v2_0_recurr_1_5_cms.csv" # input file location with nwm feature_id and recurrence flow values + +#### applying variable/composite roughness curve to SRCs #### +export src_vrough_toggle="True" # Toggle to run composite roughness src routine (True=on; False=off) +export src_vrough_plot_option="False" # optional toggle to create SRC comparison plots for each hydroid (Warning: longer run times) +export vrough_suffix="_vmann" # text to append to output log and src_full_crosswalked file names +export vmann_input_file="data/inputs/rating_curve/variable_roughness/mannings_global_06_011.csv" # input file location with nwm feature_id and channel roughness and overbank roughness attributes +export bankfull_attribute="chann_volume_ratio" # src_full_crosswalked_bankfull.csv attribute (column id) containing the channel vs overbank ratio values (generated in the identify_src_bankfull.py) #### computational parameters #### export ncores_gw=1 # mpi number of cores for gagewatershed export ncores_fd=1 # mpi number of cores for flow directions -export defaultMaxJobs=1 # default number of max concurrent jobs to run +export default_max_jobs=1 # default number of max concurrent jobs to run export memfree=0G # min free memory required to start a new job or keep youngest job alive #### logging parameters #### diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md new file mode 100644 index 000000000..3ce00ed1d --- /dev/null +++ b/docs/CHANGELOG.md @@ -0,0 +1,901 @@ +All notable changes to this project will be documented in this file. +We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. + +## v3.0.24.3 - 2021-11-29 - [PR #488](https://github.com/NOAA-OWP/cahaba/pull/488) + +Fixed projection issue in `synthesize_test_cases.py`. + +## Changes + +- `Pipfile`: Added `Pyproj` to `Pipfile` to specify a version that did not have the current projection issues. + +

+ +## v3.0.24.2 - 2021-11-18 - [PR #486](https://github.com/NOAA-OWP/cahaba/pull/486) + +Adding a new check to keep `usgs_elev_table.csv`, `src_base.csv`, `small_segments.csv` for runs not using the `-viz` flag. We unintentionally deleted some .csv files in `vary_mannings_n_composite.py` but need to maintain some of these for non `-viz` runs (e.g. `usgs_elev_table.csv` is used for sierra test input). + +## Changes + +- `fim_run.sh`: passing `-v` flag to `vary_mannings_n_composite.py` to determine which csv files to delete. Setting `$viz` = 0 for non `-v` runs. +- `src/vary_mannings_n_composite.py`: added `-v` input arg and if statement to check which .csv files to delete. +- `src/add_crosswalk.py`: removed deprecated barc variables from input args. +- `src/run_by_unit.sh`: removed deprecated barc variables from input args to `add_crosswalk.py`. + +

+ +## v3.0.24.1 - 2021-11-17 - [PR #484](https://github.com/NOAA-OWP/cahaba/pull/484) + +Patch to clean up unnecessary files and create better names for intermediate raster files. + +## Removals + +- `tools/run_test_case_gms.py`: Unnecessary file. + +## Changes + +- `tools/composite_ms_fr_inundation.py`: Clean up documentation and intermediate file names. +- `tools/run_test_case.py`: Remove unnecessary imports. + +

+ +## v3.0.24.0 - 2021-11-08 - [PR #482](https://github.com/NOAA-OWP/cahaba/pull/482) + +Adds `composite_ms_fr_inundation.py` to allow for the generation of an inundation map given a "flow file" CSV and full-resolution (FR) and mainstem (MS) relative elevation models, synthetic rating curves, and catchments rasters created by the `fim_run.sh` script. + +## Additions +- `composite_ms_fr_inundation.py`: New module that is used to inundate both MS and FR FIM and composite the two inundation rasters. +- `/tools/gms_tools/`: Three modules (`inundate_gms.py`, `mosaic_inundation.py`, `overlapping_inundation.py`) ported from the GMS branch used to composite inundation rasters. + +## Changes +- `inundation.py`: Added 2 exception classes ported from the GMS branch. + +

+ +## v3.0.23.3 - 2021-11-04 - [PR #481](https://github.com/NOAA-OWP/cahaba/pull/481) +Includes additional hydraulic properties to the `hydroTable.csv`: `Number of Cells`, `SurfaceArea (m2)`, `BedArea (m2)`, `Volume (m3)`, `SLOPE`, `LENGTHKM`, `AREASQKM`, `Roughness`, `TopWidth (m)`, `WettedPerimeter (m)`. Also adds `demDerived_reaches_split_points.gpkg`, `flowdir_d8_burned_filled.tif`, and `dem_thalwegCond.tif` to `-v` whitelist. + +## Changes +- `run_by_unit.sh`: Added `EXIT FLAG` tag and previous non-zero exit code tag to the print statement to allow log lookup. +- `add_crosswalk.py`: Added extra attributes to the hydroTable.csv. Includes a default `barc_on` and `vmann_on` (=False) attribute that is overwritten (=True) if SRC post-processing modules are run. +- `bathy_src_adjust_topwidth.py`: Overwrites the `barc_on` attribute where applicable and includes the BARC-modified Volume property. +- `vary_mannings_n_composite.py`: Overwrites the `vmann_on` attribute where applicable. +- `output_cleanup.py`: Adds new files to the `-v` whitelist. + +

+ +## v3.0.23.2 - 2021-11-04 - [PR #480](https://github.com/NOAA-OWP/cahaba/pull/480) +Hotfix for `vary_manning_n_composite.py` to address null discharge values for non-CONUS hucs. + +## Changes +- `vary_manning_n_composite.py`: Add numpy where clause to set final discharge value to the original value if `vmann=False` + +

+ +## v3.0.23.1 - 2021-11-03 - [PR #479](https://github.com/NOAA-OWP/cahaba/pull/479) +Patches the API updater. The `params_calibrated.env` is replaced with `params_template.env` because the BARC and Multi-N modules supplant the calibrated values. + +## Changes +- `api/node/updater/updater.py`: Changed `params_calibrated.env` to `params_template.env` + +

+ +## v3.0.23.0 - 2021-10-31 - [PR #475](https://github.com/NOAA-OWP/cahaba/pull/475) + +Moved the synthetic rating curve (SRC) processes from the `\tools` directory to `\src` directory to support post-processing in `fim_run.sh`. These SRC post-processing modules will now run as part of the default `fim_run.sh` workflow. Reconfigured bathymetry adjusted rating curve (BARC) module to use the 1.5yr flow from NWM v2 recurrence flow data in combination with the Bieger et al. (2015) regression equations with bankfull discharge predictor variable input. + +## Additions +- `src/bathy_src_adjust_topwidth.py` --> New version of bathymetry adjusted rating curve (BARC) module that is configured to use the Bieger et al. (2015) regression equation with input bankfull discharge as the predictor variable (previous version used the drainage area version of the regression equations). Also added log output capability, added reconfigured output content in `src_full_crosswalked_BARC.csv` and `hydroTable.csv`, and included modifications to allow BARC to run as a post-processing step in `fim_run.sh`. Reminder: BARC is only configured for MS extent. + +## Removals +- `config/params_calibrated.env` --> deprecated the calibrated roughness values by stream order with the new introduction of variable/composite roughness module +- `src/bathy_rc_adjust.py` --> deprecated the previous BARC version + +## Changes +- `src/identify_src_bankfull.py` --> Moved this script from /tools to /src, added more doc strings, cleaned up output log, and reconfigured to allow execution from fim_run.sh post-processing. +- `src/vary_mannings_n_composite.py` --> Moved this script from /tools to /src, added more doc strings, cleaned up output log, added/reconfigured output content in src_full_crosswalked_vmann.csv and hydroTable.csv, and reconfigured to allow execution from fim_run.sh post-processing. +- `config/params_template.env` --> Added additional parameter/variables for input to `identify_src_bankfull.py`, `vary_mannings_n_composite.py`, and `bathy_src_adjust_topwidth.py`. + - default BARC input: bankfull channel geometry derived from the Bieger et al. (2015) bankfull discharge regression equations + - default bankfull flow input: NWM v2 1.5-year recurrence flows + - default variable roughness input: global (all NWM feature_ids) roughness values of 0.06 for in-channel and 0.11 for max overbank +- `fim_run.sh` --> Added SRC post-processing calls after the `run_by_unit.sh` workflow +- `src/add_crosswalk.py` --> Removed BARC module call (moved to post-processing) +- `src/run_by_unit.sh` --> Removed old/unnecessary print statement. + - **Note: reset exit codes to 0 for unnecessary processing flags.** Non-zero error codes in `run_by_unit.sh` prevent the `fim_run.sh` post-processing steps from running. This error handling issue will be more appropriately handled in a soon to be release enhancement. +- `tools/run_test_case.py` --> Reverted changes used during development process + +

+ +## v3.0.22.8 - 2021-10-26 - [PR #471](https://github.com/NOAA-OWP/cahaba/pull/471) + +Manually filtering segments from stream input layer to fix flow reversal of the MS River (HUC 08030100). + +## Changes +- `clip_vectors_to_wbd.py`: Fixes bug where flow direction is reversed for HUC 08030100. The issue is resolved by filtering incoming stream segments that intersect with the elevation grid boundary. + +

+ +## v3.0.22.7 - 2021-10-08 - [PR #467](https://github.com/NOAA-OWP/cahaba/pull/467) + +These "tool" enhancements 1) delineate in-channel vs. out-of-channel geometry to allow more targeted development of key physical drivers influencing the SRC calculations (e.g. bathymetry & Manning’s n) #418 and 2) applies a variable/composite Manning’s roughness (n) using user provided csv with in-channel vs. overbank roughness values #419 & #410. + +## Additions +- `identify_src_bankfull.p`: new post-processing tool that ingests a flow csv (e.g. NWM 1.5yr recurr flow) to approximate the bankfull STG and then calculate the channel vs. overbank proportions using the volume and hydraulic radius variables +- `vary_mannings_n_composite.py`: new post-processing tool that ingests a csv containing feature_id, channel roughness, and overbank roughness and then generates composite n values via the channel ratio variable + +## Changes +- `eval_plots.py`: modified the plot legend text to display full label for development tests +- `inundation.py`: added new optional argument (-n) and corresponding function to produce a csv containing the stage value (and SRC variables) calculated from the flow to stage interpolation. + +

+ +## v3.0.22.6 - 2021-09-13 - [PR #462](https://github.com/NOAA-OWP/cahaba/pull/462) + +This new workflow ingests FIM point observations from users and “corrects” the synthetic rating curves to produce the desired FIM extent at locations where feedback is available (locally calibrate FIM). + +## Changes +- `add_crosswalk.py`: added `NextDownID` and `order_` attributes to the exported `hydroTable.csv`. This will potentially be used in future enhancements to extend SRC changes to upstream/downstream catchments. +- `adjust_rc_with_feedback.py`: added a new workflow to perform the SRC modifications (revised discharge) using the existing HAND geometry variables combined with the user provided point location flow and stage data. +- `inundation_wrapper_custom_flow.py`: updated code to allow for huc6 processing to generate custom inundation outputs. + +

+ +## v3.0.22.5 - 2021-09-08 - [PR #460](https://github.com/NOAA-OWP/cahaba/pull/460) + +Patches an issue where only certain benchmark categories were being used in evaluation. + +## Changes +- In `tools/tools_shared_variables.py`, created a variable `MAGNITUDE_DICT` to store benchmark category magnitudes. +- `synthesize_test_cases.py` imports `MAGNITUDE_DICT` and uses it to assign magnitudes. + +

+ +## v3.0.22.4 - 2021-08-30 - [PR #456](https://github.com/NOAA-OWP/cahaba/pull/456) + +Renames the BARC modified variables that are exported to `src_full_crosswalked.csv` to replace the original variables. The default/original variables are renamed with `orig_` prefix. This change is needed to ensure downstream uses of the `src_full_crosswalked.csv` are able to reference the authoritative version of the channel geometry variables (i.e. BARC-adjust where available). + +## Changes +- In `src_full_crosswalked.csv`, default/original variables are renamed with `orig_` prefix and `SA_div` is renamed to `SA_div_flag`. + +

+ +## v3.0.22.3 - 2021-08-27 - [PR #457](https://github.com/NOAA-OWP/cahaba/pull/457) + +This fixes a bug in the `get_metadata()` function in `/tools/tools_shared_functions.py` that arose because of a WRDS update. Previously the `metadata_source` response was returned as independent variables, but now it is returned a list of strings. Another issue was observed where the `EVALUATED_SITES_CSV` variable was being misdefined (at least on the development VM) through the OS environmental variable setting. + +## Changes +- In `tools_shared_functions.py`, changed parsing of WRDS `metadata_sources` to account for new list type. +- In `generate_categorical_fim_flows.py`, changed the way the `EVALUATED_SITES_CSV` path is defined from OS environmental setting to a relative path that will work within Docker container. + +

+ +## v3.0.22.2 - 2021-08-26 - [PR #455](https://github.com/NOAA-OWP/cahaba/pull/455) + +This merge addresses an issues with the bathymetry adjusted rating curve (BARC) calculations exacerbating single-pixel inundation issues for the lower Mississippi River. This fix allows the user to specify a stream order value that will be ignored in BARC calculations (reverts to using the original/default rating curve). If/when the "thalweg notch" issue is addressed, this change may be unmade. + +## Changes +- Added new env variable `ignore_streamorders` set to 10. +- Added new BARC code to set the bathymetry adjusted cross-section area to 0 (reverts to using the default SRC values) based on the streamorder env variable. + +

+ +## v3.0.22.1 - 2021-08-20 - [PR #447](https://github.com/NOAA-OWP/cahaba/pull/447) + +Patches the minimum stream length in the template parameters file. + +## Changes +- Changes `max_split_distance_meters` in `params_template.env` to 1500. + +

+ +## v3.0.22.0 - 2021-08-19 - [PR #444](https://github.com/NOAA-OWP/cahaba/pull/444) + +This adds a script, `adjust_rc_with_feedback.py`, that will be expanded in future issues. The primary function that performs the HAND value and hydroid extraction is ingest_points_layer() but this may change as the overall synthetic rating curve automatic update machanism evolves. + +## Additions +- Added `adjust_rc_with_feedback.py` with `ingest_points_layer()`, a function to extract HAND and hydroid values for use in an automatic synthetic rating curve updating mechanism. + +

+ +## v3.0.21.0 - 2021-08-18 - [PR #433](https://github.com/NOAA-OWP/cahaba/pull/433) + +General repository cleanup, made memory-profiling an optional flag, API's release feature now saves outputs. + +## Changes +- Remove `Dockerfile.prod`, rename `Dockerfile.dev` to just `Dockerfile`, and remove `.dockerignore`. +- Clean up `Dockerfile` and remove any unused* packages or variables. +- Remove any unused* Python packages from the `Pipfile`. +- Move the `CHANGELOG.md`, `SECURITY.md`, and `TERMS.md` files to the `/docs` folder. +- Remove any unused* scripts in the `/tools` and `/src` folders. +- Move `tools/preprocess` scripts into `tools/`. +- Ensure all scripts in the `/src` folder have their code in functions and are being called via a `__main__` function (This will help with implementing memory profiling fully). +- Changed memory-profiling to be an option flag `-m` for `fim_run.sh`. +- Updated FIM API to save all outputs during a "release" job. + +

+ +## v3.0.20.2 - 2021-08-13 - [PR #443](https://github.com/NOAA-OWP/cahaba/pull/443) + +This merge modifies `clip_vectors_to_wbd.py` to check for relevant input data. + +## Changes +- `clip_vectors_to_wbd.py` now checks that there are NWM stream segments within the buffered HUC boundary. +- `included_huc8_ms.lst` has several additional HUC8s. + +

+ +## v3.0.20.1 - 2021-08-12 - [PR #442](https://github.com/NOAA-OWP/cahaba/pull/442) + +This merge improves documentation in various scripts. + +## Changes +This PR better documents the following: + +- `inundate_nation.py` +- `synthesize_test_cases.py` +- `adjust_thalweg_lateral.py` +- `rem.py` + +

+ +## v3.0.20.0 - 2021-08-11 - [PR #440](https://github.com/NOAA-OWP/cahaba/pull/440) + +This merge adds two new scripts into `/tools/` for use in QAQC. + +## Additions +- `inundate_nation.py` to produce inundation maps for the entire country for use in QAQC. +- `check_deep_flooding.py` to check for depths of inundation greater than a user-supplied threshold at specific areas defined by a user-supplied shapefile. + +

+ +## v3.0.19.5 - 2021-07-19 + +Updating `README.md`. + +

+ +## v3.0.19.4 - 2021-07-13 - [PR #431](https://github.com/NOAA-OWP/cahaba/pull/431) + +Updating logging and fixing bug in vector preprocessing. + +## Additions +- `fim_completion_check.py` adds message to docker log to log any HUCs that were requested but did not finish `run_by_unit.sh`. +- Adds `input_data_edits_changelog.txt` to the inputs folder to track any manual or version/location specific changes that were made to data used in FIM 3. + +## Changes +- Provides unique exit codes to relevant domain checkpoints within `run_by_unit.sh`. +- Bug fixes in `reduce_nhd_stream_density.py`, `mprof plot` call. +- Improved error handling in `add_crosswalk.py`. + +

+ +## v3.0.19.3 - 2021-07-09 + +Hot fix to `synthesize_test_cases`. + +## Changes +- Fixed if/elif/else statement in `synthesize_test_cases.py` that resulted in only IFC data being evaluated. + +

+ +## v3.0.19.2 - 2021-07-01 - [PR #429](https://github.com/NOAA-OWP/cahaba/pull/429) + +Updates to evaluation scripts to allow for Alpha testing at Iowa Flood Center (IFC) sites. Also, `BAD_SITES` variable updates to omit sites not suitable for evaluation from metric calculations. + +## Changes +- The `BAD_SITES` list in `tools_shared_variables.py` was updated and reasons for site omission are documented. +- Refactored `run_test_case.py`, `synthesize_test_cases.py`, `tools_shared_variables.py`, and `eval_plots.py` to allow for IFC comparisons. + +

+ +## v3.0.19.1 - 2021-06-17 - [PR #417](https://github.com/NOAA-OWP/cahaba/pull/417) + +Adding a thalweg profile tool to identify significant drops in thalweg elevation. Also setting lateral thalweg adjustment threshold in hydroconditioning. + +## Additions +- `thalweg_drop_check.py` checks the elevation along the thalweg for each stream path downstream of MS headwaters within a HUC. + +## Removals +- Removing `dissolveLinks` arg from `clip_vectors_to_wbd.py`. + +## Changes +- Cleaned up code in `split_flows.py` to make it more readable. +- Refactored `reduce_nhd_stream_density.py` and `adjust_headwater_streams.py` to limit MS headwater points in `agg_nhd_headwaters_adj.gpkg`. +- Fixed a bug in `adjust_thalweg_lateral.py` lateral elevation replacement threshold; changed threshold to 3 meters. +- Updated `aggregate_vector_inputs.py` to log intermediate processes. + +

+ +## v3.0.19.0 - 2021-06-10 - [PR #415](https://github.com/NOAA-OWP/cahaba/pull/415) + +Feature to evaluate performance of alternative CatFIM techniques. + +## Additions +- Added `eval_catfim_alt.py` to evaluate performance of alternative CatFIM techniques. + +

+## v3.0.18.0 - 2021-06-09 - [PR #404](https://github.com/NOAA-OWP/cahaba/pull/404) + +To help analyze the memory consumption of the Fim Run process, the python module `memory-profiler` has been added to give insights into where peak memory usage is with in the codebase. + +In addition, the Dockerfile was previously broken due to the Taudem dependency removing the version that was previously being used by FIM. To fix this, and allow new docker images to be built, the Taudem version has been updated to the newest version on the Github repo and thus needs to be thoroughly tested to determine if this new version has affected the overall FIM outputs. + +## Additions +- Added `memory-profiler` to `Pipfile` and `Pipfile.lock`. +- Added `mprof` (memory-profiler cli utility) call to the `time_and_tee_run_by_unit.sh` to create overall memory usage graph location in the `/logs/{HUC}_memory.png` of the outputs directory. +- Added `@profile` decorator to all functions within scripts used in the `run_by_unit.sh` script to allow for memory usage tracking, which is then recorded in the `/logs/{HUC}.log` file of the outputs directory. + +## Changes +- Changed the Taudem version in `Dockerfile.dev` to `98137bb6541a0d0077a9c95becfed4e56d0aa0ac`. +- Changed all calls of python scripts in `run_by_unit.s` to be called with the `-m memory-profiler` argument to allow scripts to also track memory usage. + +

+## v3.0.17.1 - 2021-06-04 - [PR #395](https://github.com/NOAA-OWP/cahaba/pull/395) + +Bug fix to the `generate_nws_lid.py` script + +## Changes +- Fixes incorrectly assigned attribute field "is_headwater" for some sites in the `nws_lid.gpkg` layer. +- Updated `agg_nhd_headwaters_adj.gpkg`, `agg_nhd_streams_adj.gpkg`, `nwm_flows.gpkg`, and `nwm_catchments.gpkg` input layers using latest NWS LIDs. + +

+## v3.0.17.0 - 2021-06-04 - [PR #393](https://github.com/NOAA-OWP/cahaba/pull/393) +BARC updates to cap the bathy calculated xsec area in `bathy_rc_adjust.py` and allow user to choose input bankfull geometry. + +## Changes + +- Added new env variable to control which input file is used for the bankfull geometry input to bathy estimation workflow. +- Modified the bathymetry cross section area calculation to cap the additional area value so that it cannot exceed the bankfull cross section area value for each stream segment (bankfull value obtained from regression equation dataset). +- Modified the `rating_curve_comparison.py` plot output to always put the FIM rating curve on top of the USGS rating curve (avoids USGS points covering FIM). +- Created a new aggregate csv file (aggregates for all hucs) for all of the `usgs_elev_table.csv` files (one per huc). +- Evaluate the FIM Bathymetry Adjusted Rating Curve (BARC) tool performance using the estimated bankfull geometry dataset derived for the NWM route link dataset. + +

+## v3.0.16.3 - 2021-05-21 - [PR #388](https://github.com/NOAA-OWP/cahaba/pull/388) + +Enhancement and bug fixes to `synthesize_test_cases.py`. + +## Changes +- Addresses a bug where AHPS sites without benchmark data were receiving a CSI of 0 in the master metrics CSV produced by `synthesize_test_cases.py`. +- Includes a feature enhancement to `synthesize_test_cases.py` that allows for the inclusion of user-specified testing versions in the master metrics CSV. +- Removes some of the print statements used by `synthesize_test_cases.py`. + +

+## v3.0.16.2 - 2021-05-18 - [PR #384](https://github.com/NOAA-OWP/cahaba/pull/384) + +Modifications and fixes to `run_test_case.py`, `eval_plots.py`, and AHPS preprocessing scripts. + +## Changes +- Comment out return statement causing `run_test_case.py` to skip over sites/hucs when calculating contingency rasters. +- Move bad sites list and query statement used to filter out bad sites to the `tools_shared_variables.py`. +- Add print statements in `eval_plots.py` detailing the bad sites used and the query used to filter out bad sites. +- Update AHPS preprocessing scripts to produce a domain shapefile. +- Change output filenames produced in ahps preprocessing scripts. +- Update workarounds for some sites in ahps preprocessing scripts. + +

+## v3.0.16.1 - 2021-05-11 - [PR #380](https://github.com/NOAA-OWP/cahaba/pull/380) + +The current version of Eventlet used in the Connector module of the FIM API is outdated and vulnerable. This update bumps the version to the patched version. + +## Changes +- Updated `api/node/connector/requirements.txt` to have the Eventlet version as 0.31.0 + +

+## v3.0.16.0 - 2021-05-07 - [PR #378](https://github.com/NOAA-OWP/cahaba/pull/378) + +New "Release" feature added to the FIM API. This feature will allow for automated FIM, CatFIM, and relevant metrics to be generated when a new FIM Version is released. See [#373](https://github.com/NOAA-OWP/cahaba/issues/373) for more detailed steps that take place in this feature. + +## Additions +- Added new window to the UI in `api/frontend/gui/templates/index.html`. +- Added new job type to `api/node/connector/connector.py` to allow these release jobs to run. +- Added additional logic in `api/node/updater/updater.py` to run the new eval and CatFIM scripts used in the release feature. + +## Changes +- Updated `api/frontend/output_handler/output_handler.py` to allow for copying more broad ranges of file paths instead of only the `/data/outputs` directory. + +

+## v3.0.15.10 - 2021-05-06 - [PR #375](https://github.com/NOAA-OWP/cahaba/pull/375) + +Remove Great Lakes coastlines from WBD buffer. + +## Changes +- `gl_water_polygons.gpkg` layer is used to mask out Great Lakes boundaries and remove NHDPlus HR coastline segments. + +

+## v3.0.15.9 - 2021-05-03 - [PR #372](https://github.com/NOAA-OWP/cahaba/pull/372) + +Generate `nws_lid.gpkg`. + +## Additions +- Generate `nws_lid.gpkg` with attributes indicating if site is a headwater `nws_lid` as well as if it is co-located with another `nws_lid` which is referenced to the same `nwm_feature_id` segment. + +

+## v3.0.15.8 - 2021-04-29 - [PR #371](https://github.com/NOAA-OWP/cahaba/pull/371) + +Refactor NHDPlus HR preprocessing workflow. Resolves issue #238 + +## Changes +- Consolidate NHD streams, NWM catchments, and headwaters MS and FR layers with `mainstem` column. +- HUC8 intersections are included in the input headwaters layer. +- `clip_vectors_to_wbd.py` removes incoming stream segment from the selected layers. + +

+## v3.0.15.7 - 2021-04-28 - [PR #367](https://github.com/NOAA-OWP/cahaba/pull/367) + +Refactor synthesize_test_case.py to handle exceptions during multiprocessing. Resolves issue #351 + +## Changes +- refactored `inundation.py` and `run_test_case.py` to handle exceptions without using `sys.exit()`. + +

+## v3.0.15.6 - 2021-04-23 - [PR #365](https://github.com/NOAA-OWP/cahaba/pull/365) + +Implement CatFIM threshold flows to Sierra test and add AHPS benchmark preprocessing scripts. + +## Additions +- Produce CatFIM flows file when running `rating_curve_get_usgs_gages.py`. +- Several scripts to preprocess AHPS benchmark data. Requires numerous file dependencies not available through Cahaba. + +## Changes +- Modify `rating_curve_comparison.py` to ingest CatFIM threshold flows in calculations. +- Modify `eval_plots.py` to save all site specific bar plots in same parent directory instead of in subdirectories. +- Add variables to `env.template` for AHPS benchmark preprocessing. + +

+## v3.0.15.5 - 2021-04-20 - [PR #363](https://github.com/NOAA-OWP/cahaba/pull/363) + +Prevent eval_plots.py from erroring out when spatial argument enabled if certain datasets not analyzed. + +## Changes +- Add check to make sure analyzed dataset is available prior to creating spatial dataset. + +

+## v3.0.15.4 - 2021-04-20 - [PR #356](https://github.com/NOAA-OWP/cahaba/pull/356) + +Closing all multiprocessing Pool objects in repo. + +

+## v3.0.15.3 - 2021-04-19 - [PR #358](https://github.com/NOAA-OWP/cahaba/pull/358) + +Preprocess NHDPlus HR rasters for consistent projections, nodata values, and convert from cm to meters. + +## Additions +- `preprocess_rasters.py` reprojects raster, converts to meters, and updates nodata value to -9999. +- Cleaned up log messages from `bathy_rc_adjust.py` and `usgs_gage_crosswalk.py`. +- Outputs paths updated in `generate_categorical_fim_mapping.py` and `generate_categorical_fim.py`. +- `update_raster_profile` cleans up raster crs, blocksize, nodata values, and converts elevation grids from cm to meters. +- `reproject_dem.py` imports gdal to reproject elevation rasters because an error was occurring when using rasterio. + +## Changes +- `burn_in_levees.py` replaces the `gdal_calc.py` command to resolve inconsistent outputs with burned in levee values. + +

+## v3.0.15.2 - 2021-04-16 - [PR #359](https://github.com/NOAA-OWP/cahaba/pull/359) + +Hotfix to preserve desired files when production flag used in `fim_run.sh`. + +## Changes + +- Fixed production whitelisted files. + +

+## v3.0.15.1 - 2021-04-13 - [PR #355](https://github.com/NOAA-OWP/cahaba/pull/355) + +Sierra test considered all USGS gage locations to be mainstems even though many actually occurred with tributaries. This resulted in unrealistic comparisons as incorrect gages were assigned to mainstems segments. This feature branch identifies gages that are on mainstems via attribute field. + +## Changes + +- Modifies `usgs_gage_crosswalk.py` to filter out gages from the `usgs_gages.gpkg` layer such that for a "MS" run, only consider gages that contain rating curve information (via `curve` attribute) and are also mainstems gages (via `mainstems` attribute). +- Modifies `usgs_gage_crosswalk.py` to filter out gages from the `usgs_gages.gpkg` layer such that for a "FR" run, only consider gages that contain rating curve information (via `curve` attribute) and are not mainstems gages (via `mainstems` attribute). +- Modifies how mainstems segments are determined by using the `nwm_flows_ms.gpkg` as a lookup to determine if the NWM segment specified by WRDS for a gage site is a mainstems gage. + +## Additions + +- Adds a `mainstem` attribute field to `usgs_gages.gpkg` that indicates whether a gage is located on a mainstems river. +- Adds `NWM_FLOWS_MS` variable to the `.env` and `.env.template` files. +- Adds the `extent` argument specified by user when running `fim_run.sh` to `usgs_gage_crosswalk.py`. + +

+## v3.0.15.0 - 2021-04-08 - [PR #340](https://github.com/NOAA-OWP/cahaba/pull/340) + +Implementing a prototype technique to estimate the missing bathymetric component in the HAND-derived synthetic rating curves. The new Bathymetric Adjusted Rating Curve (BARC) function is built within the `fim_run.sh` workflow and will ingest bankfull geometry estimates provided by the user to modify the cross section area used in the synthetic rating curve generation. + +### Changes + - `add_crosswalk.py` outputs the stream order variables to `src_full_crosswalked.csv` and calls the new `bathy_rc_adjust.py` if bathy env variable set to True and `extent=MS`. + - `run_by_unit.sh` includes a new csv outputs for reviewing BARC calculations. + - `params_template.env` & `params_calibrated.env` contain new BARC function input variables and on/off toggle variable. + - `eval_plots.py` now includes additional AHPS eval sites in the list of "bad_sites" (flagged issues with MS flowlines). + +### Additions + - `bathy_rc_adjust.py`: + - Imports the existing synthetic rating curve table and the bankfull geometry input data (topwidth and cross section area per COMID). + - Performs new synthetic rating curve calculations with bathymetry estimation modifications. + - Flags issues with the thalweg-notch artifact. + +

+## v3.0.14.0 - 2021-04-05 - [PR #338](https://github.com/NOAA-OWP/cahaba/pull/338) + +Create tool to retrieve rating curves from USGS sites and convert to elevation (NAVD88). Intended to be used as part of the Sierra Test. + +### Changes + - Modify `usgs_gage_crosswalk.py` to: + 1) Look for `location_id` instead of `site_no` attribute field in `usgs_gages.gpkg` file. + 2) Filter out gages that do not have rating curves included in the `usgs_rating_curves.csv`. + - Modify `rating_curve_comparison.py` to perform a check on the age of the user specified `usgs_rating_curves.csv` and alert user to the age of the file and recommend updating if file is older the 30 days. + +### Additions + - Add `rating_curve_get_usgs_curves.py`. This script will generate the following files: + 1) `usgs_rating_curves.csv`: A csv file that contains rating curves (including converted to NAVD88 elevation) for USGS gages in a format that is compatible with `rating_curve_comparisons.py`. As it is is currently configured, only gages within CONUS will have rating curve data. + 2) `log.csv`: A log file that records status for each gage and includes error messages. + 3) `usgs_gages.gpkg`: A geospatial layer (in FIM projection) of all active USGS gages that meet a predefined criteria. Additionally, the `curve` attribute indicates whether a rating curve is found in the `usgs_rating_curves.csv`. This spatial file is only generated if the `all` option is passed with the `-l` argument. + +

+## v3.0.13.0 - 2021-04-01 - [PR #332](https://github.com/NOAA-OWP/cahaba/pull/332) + +Created tool to compare synthetic rating curve with benchmark rating curve (Sierra Test). + +### Changes + - Update `aggregate_fim_outputs.py` call argument in `fim_run.sh` from 4 jobs to 6 jobs, to optimize API performance. + - Reroutes median elevation data from `add_crosswalk.py` and `rem.py` to new file (depreciating `hand_ref_elev_table.csv`). + - Adds new files to `viz_whitelist` in `output_cleanup.py`. + +### Additions + - `usgs_gage_crosswalk.py`: generates `usgs_elev_table.csv` in `run_by_unit.py` with elevation and additional attributes at USGS gages. + - `rating_curve_comparison.py`: post-processing script to plot and calculate metrics between synthetic rating curves and USGS rating curve data. + +

+## v3.0.12.1 - 2021-03-31 - [PR #336](https://github.com/NOAA-OWP/cahaba/pull/336) + +Fix spatial option in `eval_plots.py` when creating plots and spatial outputs. + +### Changes + - Removes file dependencies from spatial option. Does require the WBD layer which should be specified in `.env` file. + - Produces outputs in a format consistent with requirements needed for publishing. + - Preserves leading zeros in huc information for all outputs from `eval_plots.py`. + +### Additions + - Creates `fim_performance_points.shp`: this layer consists of all evaluated ahps points (with metrics). Spatial data retrieved from WRDS on the fly. + - Creates `fim_performance_polys.shp`: this layer consists of all evaluated huc8s (with metrics). Spatial data retrieved from WBD layer. + +

+## v3.0.12.0 - 2021-03-26 - [PR #327](https://github.com/NOAA-OWP/cahaba/pull/237) + +Add more detail/information to plotting capabilities. + +### Changes + - Merge `plot_functions.py` into `eval_plots.py` and move `eval_plots.py` into the tools directory. + - Remove `plots` subdirectory. + +### Additions + - Optional argument to create barplots of CSI for each individual site. + - Create a csv containing the data used to create the scatterplots. + +

+## v3.0.11.0 - 2021-03-22 - [PR #319](https://github.com/NOAA-OWP/cahaba/pull/298) + +Improvements to CatFIM service source data generation. + +### Changes + - Renamed `generate_categorical_fim.py` to `generate_categorical_fim_mapping.py`. + - Updated the status outputs of the `nws_lid_sites layer` and saved it in the same directory as the `merged catfim_library layer`. + - Additional stability fixes (such as improved compatability with WRDS updates). + +### Additions + - Added `generate_categorical_fim.py` to wrap `generate_categorical_fim_flows.py` and `generate_categorical_fim_mapping.py`. + - Create new `nws_lid_sites` shapefile located in same directory as the `catfim_library` shapefile. + +

+## v3.0.10.1 - 2021-03-24 - [PR #320](https://github.com/NOAA-OWP/cahaba/pull/320) + +Patch to synthesize_test_cases.py. + +### Changes + - Bug fix to `synthesize_test_cases.py` to allow comparison between `testing` version and `official` versions. + +

+## v3.0.10.0 - 2021-03-12 - [PR #298](https://github.com/NOAA-OWP/cahaba/pull/298) + +Preprocessing of flow files for Categorical FIM. + +### Additions + - Generate Categorical FIM flow files for each category (action, minor, moderate, major). + - Generate point shapefile of Categorical FIM sites. + - Generate csv of attribute data in shapefile. + - Aggregate all shapefiles and csv files into one file in parent directory. + - Add flood of record category. + + ### Changes + - Stability fixes to `generate_categorical_fim.py`. + +

+## v3.0.9.0 - 2021-03-12 - [PR #297](https://github.com/NOAA-OWP/cahaba/pull/297) + +Enhancements to FIM API. + +### Changes + - `fim_run.sh` can now be run with jobs in parallel. + - Viz post-processing can now be selected in API interface. + - Jobs table shows jobs that end with errors. + - HUC preset lists can now be selected in interface. + - Better `output_handler` file writing. + - Overall better restart and retry handlers for networking problems. + - Jobs can now be canceled in API interface. + - Both FR and MS configs can be selected for a single job. + +

+## v3.0.8.2 - 2021-03-11 - [PR #296](https://github.com/NOAA-OWP/cahaba/pull/296) + +Enhancements to post-processing for Viz-related use-cases. + +### Changes + - Aggregate grids are projected to Web Mercator during `-v` runs in `fim_run.sh`. + - HUC6 aggregation is parallelized. + - Aggregate grid blocksize is changed from 256 to 1024 for faster postprocessing. + +

+## v3.0.8.1 - 2021-03-10 - [PR #302](https://github.com/NOAA-OWP/cahaba/pull/302) + +Patched import issue in `tools_shared_functions.py`. + +### Changes + - Changed `utils.` to `tools_` in `tools_shared_functions.py` after recent structural change to `tools` directory. + +

+## v3.0.8.0 - 2021-03-09 - [PR #279](https://github.com/NOAA-OWP/cahaba/pull/279) + +Refactored NWS Flood Categorical HAND FIM (CatFIM) pipeline to open source. + +### Changes + - Added `VIZ_PROJECTION` to `shared_variables.py`. + - Added missing library referenced in `inundation.py`. + - Cleaned up and converted evaluation scripts in `generate_categorical_fim.py` to open source. + - Removed `util` folders under `tools` directory. + +

+## v3.0.7.1 - 2021-03-02 - [PR #290](https://github.com/NOAA-OWP/cahaba/pull/290) + +Renamed benchmark layers in `test_cases` and updated variable names in evaluation scripts. + +### Changes + - Updated `run_test_case.py` with new benchmark layer names. + - Updated `run_test_case_calibration.py` with new benchmark layer names. + +

+## v3.0.7.0 - 2021-03-01 - [PR #288](https://github.com/NOAA-OWP/cahaba/pull/288) + +Restructured the repository. This has no impact on hydrological work done in the codebase and is simply moving files and renaming directories. + +### Changes + - Moved the contents of the `lib` folder to a new folder called `src`. + - Moved the contents of the `tests` folder to the `tools` folder. + - Changed any instance of `lib` or `libDir` to `src` or `srcDir`. + +

+## v3.0.6.0 - 2021-02-25 - [PR #276](https://github.com/NOAA-OWP/cahaba/pull/276) + +Enhancement that creates metric plots and summary statistics using metrics compiled by `synthesize_test_cases.py`. + +### Additions + - Added `eval_plots.py`, which produces: + - Boxplots of CSI, FAR, and POD/TPR + - Barplot of aggregated CSI scores + - Scatterplot of CSI comparing two FIM versions + - CSV of aggregated statistics (CSI, FAR, POD/TPR) + - CSV of analyzed data and analyzed sites + +

+## v3.0.5.3 - 2021-02-23 - [PR #275](https://github.com/NOAA-OWP/cahaba/pull/275) + +Bug fixes to new evaluation code. + +### Changes + + - Fixed a bug in `synthesize_test_cases.py` where the extent (MS/FR) was not being written to merged metrics file properly. + - Fixed a bug in `synthesize_test_cases.py` where only BLE test cases were being written to merged metrics file. + - Removed unused imports from `inundation.py`. + - Updated README.md + +

+## v3.0.5.2 - 2021-02-23 - [PR #272](https://github.com/NOAA-OWP/cahaba/pull/272) + +Adds HAND synthetic rating curve (SRC) datum elevation values to `hydroTable.csv` output. + +### Changes + + - Updated `add_crosswalk.py` to included "Median_Thal_Elev_m" variable outputs in `hydroTable.csv`. + - Renamed hydroid attribute in `rem.py` to "Median" in case we want to include other statistics in the future (e.g. min, max, range etc.). + +

+## v3.0.5.1 - 2021-02-22 + +Fixed `TEST_CASES_DIR` path in `tests/utils/shared_variables.py`. + +### Changes + + - Removed `"_new"` from `TEST_CASES_DIR` variable. + +

+## v3.0.5.0 - 2021-02-22 - [PR #267](https://github.com/NOAA-OWP/cahaba/pull/267) + +Enhancements to allow for evaluation at AHPS sites, the generation of a query-optimized metrics CSV, and the generation of categorical FIM. This merge requires that the `/test_cases` directory be updated for all machines performing evaluation. + +### Additions + + - `generate_categorical_fim.py` was added to allow production of NWS Flood Categorical HAND FIM (CatFIM) source data. More changes on this script are to follow in subsequent branches. + +### Removals + + - `ble_autoeval.sh` and `all_ble_stats_comparison.py` were deleted because `synthesize_test_cases.py` now handles the merging of metrics. + - The code block in `run_test_case.py` that was responsible for printing the colored metrics to screen has been commented out because of the new scale of evaluations (formerly in `run_test_case.py`, now in `shared_functions.py`) + - Remove unused imports from inundation wrappers in `/tools`. + +### Changes + + - Updated `synthesize_test_cases.py` to allow for AHPS site evaluations. + - Reorganized `run_test_case.py` by moving more functions into `shared_functions.py`. + - Created more shared variables in `shared_variables.py` and updated import statements in relevant scripts. + +

+ +## v3.0.4.4 - 2021-02-19 - [PR #266](https://github.com/NOAA-OWP/cahaba/pull/266) + +Rating curves for short stream segments are replaced with rating curves from upstream/downstream segments. + +### Changes + + - Short stream segments are identified and are reassigned the channel geometry from upstream/downstream segment. + - `fossid` renamed to `fimid` and the attribute's starting value is now 1000 to avoid HydroIDs with leading zeroes. + - Addresses issue where HydroIDs were not included in final hydrotable. + - Added `import sys` to `inundation.py` (missing from previous feature branch). + - Variable names and general workflow are cleaned up. + +

+## v3.0.4.3 - 2021-02-12 - [PR #254](https://github.com/NOAA-OWP/cahaba/pull/254) + +Modified `rem.py` with a new function to output HAND reference elev. + +### Changes + + - Function `make_catchment_hydroid_dict` creates a df of pixel catchment ids and overlapping hydroids. + - Merge hydroid df and thalweg minimum elevation df. + - Produces new output containing all catchment ids and min thalweg elevation value named `hand_ref_elev_table.csv`. + - Overwrites the `demDerived_reaches_split.gpk` layer by adding additional attribute `Min_Thal_Elev_meters` to view the elevation value for each hydroid. + +

+## v3.0.4.2 - 2021-02-12 - [PR #255](https://github.com/NOAA-OWP/cahaba/pull/255) + +Addresses issue when running on HUC6 scale. + +### Changes + + - `src.json` should be fixed and slightly smaller by removing whitespace. + - Rasters are about the same size as running fim as huc6 (compressed and tiled; aggregated are slightly larger). + - Naming convention and feature id attribute are only added to the aggregated hucs. + - HydroIDs are different for huc6 vs aggregated huc8s mostly due to forced split at huc boundaries (so long we use consistent workflow it shouldn't matter). + - Fixed known issue where sometimes an incoming stream is not included in the final selection will affect aggregate outputs. + +

+## v3.0.4.1 - 2021-02-12 - [PR #261](https://github.com/NOAA-OWP/cahaba/pull/261) + +Updated MS Crosswalk method to address gaps in FIM. + +### Changes + + - Fixed typo in stream midpoint calculation in `split_flows.py` and `add_crosswalk.py`. + - `add_crosswalk.py` now restricts the MS crosswalk to NWM MS catchments. + - `add_crosswalk.py` now performs a secondary MS crosswalk selection by nearest NWM MS catchment. + +

+## v3.0.4.0 - 2021-02-10 - [PR #256](https://github.com/NOAA-OWP/cahaba/pull/256) + +New python script "wrappers" for using `inundation.py`. + +### Additions + + - Created `inundation_wrapper_nwm_flows.py` to produce inundation outputs using NWM recurrence flows: 1.5 year, 5 year, 10 year. + - Created `inundation_wrapper_custom_flow.py` to produce inundation outputs with user-created flow file. + - Created new `tools` parent directory to store `inundation_wrapper_nwm_flows.py` and `inundation_wrapper_custom_flow.py`. + +

+## v3.0.3.1 - 2021-02-04 - [PR #253](https://github.com/NOAA-OWP/cahaba/pull/253) + +Bug fixes to correct mismatched variable name and file path. + +### Changes + + - Corrected variable name in `fim_run.sh`. + - `acquire_and_preprocess_inputs.py` now creates `huc_lists` folder and updates file path. + +

+## v3.0.3.0 - 2021-02-04 - [PR #227](https://github.com/NOAA-OWP/cahaba/pull/227) + +Post-process to aggregate FIM outputs to HUC6 scale. + +### Additions + + - Viz outputs aggregated to HUC6 scale; saves outputs to `aggregate_fim_outputs` folder. + +### Changes + + - `split_flows.py` now splits streams at HUC8 boundaries to ensure consistent catchment boundaries along edges. + - `aggregate_fim_outputs.sh` has been depreciated but remains in the repo for potential FIM 4 development. + - Replaced geopandas driver arg with getDriver throughout repo. + - Organized parameters in environment files by group. + - Cleaned up variable names in `split_flows.py` and `build_stream_traversal.py`. + - `build_stream_traversal.py` is now assigning HydroID by midpoint instead centroid. + - Cleanup of `clip_vectors_to_wbd.py`. + +

+## v3.0.2.0 - 2021-01-25 - [PR #218](https://github.com/NOAA-OWP/cahaba/pull/218) + +Addition of an API service to schedule, run and manage `fim_run` jobs through a user-friendly web interface. + +### Additions + + - `api` folder that contains all the codebase for the new service. + +

+## v3.0.1.0 - 2021-01-21 - [PR #206](https://github.com/NOAA-OWP/cahaba/pull/206) + +Preprocess MS and FR stream networks + +### Changes + + - Headwater stream segments geometries are adjusted to align with with NWM streams. + - Incoming streams are selected using intersection points between NWM streams and HUC4 boundaries. + - `clip_vectors_to_wbd.py` handles local headwaters. + - Removes NHDPlus features categorized as coastline and underground conduit. + - Added streams layer to production whitelist. + - Fixed progress bar in `lib/acquire_and_preprocess_inputs.py`. + - Added `getDriver` to shared `functions.py`. + - Cleaned up variable names and types. + +

+## v3.0.0.4 - 2021-01-20 - [PR #230](https://github.com/NOAA-OWP/cahaba/pull/230) + +Changed the directory where the `included_huc*.lst` files are being read from. + +### Changes + + - Changed the directory where the `included_huc*.lst` files are being read from. + +

+## v3.0.0.3 - 2021-01-14 - [PR #210](https://github.com/NOAA-OWP/cahaba/pull/210) + +Hotfix for handling nodata value in rasterized levee lines. + +### Changes + + - Resolves bug for HUCs where `$ndv > 0` (Great Lakes region). + - Initialize the `nld_rasterized_elev.tif` using a value of `-9999` instead of `$ndv`. + +

+## v3.0.0.2 - 2021-01-06 - [PR #200](https://github.com/NOAA-OWP/cahaba/pull/200) + +Patch to address AHPSs mapping errors. + +### Changes + + - Checks `dtype` of `hydroTable.csv` columns to resolve errors caused in `inundation.py` when joining to flow forecast. + - Exits `inundation.py` when all hydrotable HydroIDs are lake features. + - Updates path to latest AHPs site layer. + - Updated [readme](https://github.com/NOAA-OWP/cahaba/commit/9bffb885f32dfcd95978c7ccd2639f9df56ff829) + +

+## v3.0.0.1 - 2020-12-31 - [PR #184](https://github.com/NOAA-OWP/cahaba/pull/184) + +Modifications to build and run Docker image more reliably. Cleanup on some pre-processing scripts. + +### Changes + + - Changed to noninteractive install of GRASS. + - Changed some paths from relative to absolute and cleaned up some python shebang lines. + +### Notes + - `aggregate_vector_inputs.py` doesn't work yet. Need to externally download required data to run fim_run.sh + +

+## v3.0.0.0 - 2020-12-22 - [PR #181](https://github.com/NOAA-OWP/cahaba/pull/181) + +The software released here builds on the flood inundation mapping capabilities demonstrated as part of the National Flood Interoperability Experiment, the Office of Water Prediction's Innovators Program and the National Water Center Summer Institute. The flood inundation mapping software implements the Height Above Nearest Drainage (HAND) algorithm and incorporates community feedback and lessons learned over several years. The software has been designed to meet the requirements set by stakeholders interested in flood prediction and has been developed in partnership with several entities across the water enterprise. diff --git a/INSTALL.md b/docs/INSTALL.md similarity index 100% rename from INSTALL.md rename to docs/INSTALL.md diff --git a/SECURITY.md b/docs/SECURITY.md similarity index 100% rename from SECURITY.md rename to docs/SECURITY.md diff --git a/TERMS.md b/docs/TERMS.md similarity index 100% rename from TERMS.md rename to docs/TERMS.md diff --git a/fim_run.sh b/fim_run.sh index fb22bc8c2..6d4d58a7a 100755 --- a/fim_run.sh +++ b/fim_run.sh @@ -21,6 +21,7 @@ usage () echo ' -w/--whitelist : list of files to save in a production run in addition to final inundation outputs' echo ' ex: file1.tif,file2.json,file3.csv' echo ' -v/--viz : compute post-processing on outputs to be used in viz' + echo ' -m/--mem : enable memory profiling' exit } @@ -69,6 +70,9 @@ in -v|--viz) viz=1 ;; + -m|--mem) + mem=1 + ;; *) ;; esac shift @@ -94,11 +98,14 @@ fi ## SOURCE ENV FILE AND FUNCTIONS ## source $envFile -source $libDir/bash_functions.env +source $srcDir/bash_functions.env # default values if [ "$jobLimit" = "" ] ; then - jobLimit=$defaultMaxJobs + jobLimit=$default_max_jobs +fi +if [ "$viz" = "" ] ; then + viz=0 fi ## Define Outputs Data Dir & Log File## @@ -107,18 +114,19 @@ export extent=$extent export production=$production export whitelist=$whitelist export viz=$viz +export mem=$mem logFile=$outputRunDataDir/logs/summary.log ## Define inputs export input_WBD_gdb=$inputDataDir/wbd/WBD_National.gpkg -export input_NWM_Lakes=$inputDataDir/nwm_hydrofabric/nwm_lakes.gpkg -export input_NWM_Catchments=$inputDataDir/nwm_hydrofabric/nwm_catchments.gpkg -export input_NWM_Flows=$inputDataDir/nwm_hydrofabric/nwm_flows.gpkg -export input_NWM_Headwaters=$inputDataDir/nwm_hydrofabric/nwm_headwaters.gpkg -export input_NHD_Flowlines=$inputDataDir/nhdplus_vectors_aggregate/NHDPlusBurnLineEvent_wVAA.gpkg - +export input_nwm_lakes=$inputDataDir/nwm_hydrofabric/nwm_lakes.gpkg +export input_nwm_catchments=$inputDataDir/nwm_hydrofabric/nwm_catchments.gpkg +export input_nwm_flows=$inputDataDir/nwm_hydrofabric/nwm_flows.gpkg +export input_nhd_flowlines=$inputDataDir/nhdplus_vectors_aggregate/agg_nhd_streams_adj.gpkg +export input_nhd_headwaters=$inputDataDir/nhdplus_vectors_aggregate/agg_nhd_headwaters_adj.gpkg +export input_GL_boundaries=$inputDataDir/landsea/gl_water_polygons.gpkg ## Input handling ## -$libDir/check_huc_inputs.py -u "$hucList" +$srcDir/check_huc_inputs.py -u "$hucList" ## Make output and data directories ## if [ -d "$outputRunDataDir" ] && [ "$overwrite" -eq 1 ]; then @@ -132,17 +140,42 @@ mkdir -p $outputRunDataDir/logs ## RUN ## if [ -f "$hucList" ]; then if [ "$jobLimit" -eq 1 ]; then - parallel --verbose --lb -j $jobLimit --joblog $logFile -- $libDir/time_and_tee_run_by_unit.sh :::: $hucList + parallel --verbose --lb -j $jobLimit --joblog $logFile -- $srcDir/time_and_tee_run_by_unit.sh :::: $hucList else - parallel --eta -j $jobLimit --joblog $logFile -- $libDir/time_and_tee_run_by_unit.sh :::: $hucList + parallel --eta -j $jobLimit --joblog $logFile -- $srcDir/time_and_tee_run_by_unit.sh :::: $hucList fi else if [ "$jobLimit" -eq 1 ]; then - parallel --verbose --lb -j $jobLimit --joblog $logFile -- $libDir/time_and_tee_run_by_unit.sh ::: $hucList + parallel --verbose --lb -j $jobLimit --joblog $logFile -- $srcDir/time_and_tee_run_by_unit.sh ::: $hucList else - parallel --eta -j $jobLimit --joblog $logFile -- $libDir/time_and_tee_run_by_unit.sh ::: $hucList + parallel --eta -j $jobLimit --joblog $logFile -- $srcDir/time_and_tee_run_by_unit.sh ::: $hucList fi fi -# aggregate outputs -bash /foss_fim/lib/aggregate_fim_outputs.sh $outputRunDataDir +# identify missing HUCs +# time python3 /foss_fim/tools/fim_completion_check.py -i $hucList -o $outputRunDataDir +if [ "$extent" = "MS" ] && [ "$bathy_src_toggle" = "True" ]; then + # Run BARC routine + echo -e $startDiv"Performing Bathy Adjusted Rating Curve routine"$stopDiv + time python3 /foss_fim/src/bathy_src_adjust_topwidth.py -fim_dir $outputRunDataDir -bfull_geom $bankfull_input_table -j $jobLimit -plots $src_plot_option +else + echo -e $startDiv"SKIPPING Bathy Adjusted Rating Curve routine"$stopDiv +fi + +echo -e $startDiv"Estimating bankfull stage in SRCs"$stopDiv +if [ "$src_bankfull_toggle" = "True" ]; then + # Run BARC routine + time python3 /foss_fim/src/identify_src_bankfull.py -fim_dir $outputRunDataDir -flows $bankfull_flows_file -j $jobLimit -plots $src_bankfull_plot_option +fi + +echo -e $startDiv"Applying variable roughness in SRCs"$stopDiv +if [ "$src_vrough_toggle" = "True" ]; then + # Run BARC routine + time python3 /foss_fim/src/vary_mannings_n_composite.py -fim_dir $outputRunDataDir -mann $vmann_input_file -bc $bankfull_attribute -suff $vrough_suffix -j $jobLimit -plots $src_vrough_plot_option -viz_clean $viz +fi + +echo "$viz" +if [[ "$viz" -eq 1 ]]; then + # aggregate outputs + time python3 /foss_fim/src/aggregate_fim_outputs.py -d $outputRunDataDir -j 6 +fi diff --git a/install_grass.exp b/install_grass.exp deleted file mode 100755 index e1320ffb9..000000000 --- a/install_grass.exp +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/expect -f -# -# This Expect script was generated by autoexpect on Thu Oct 22 20:27:42 2020 -# Expect and autoexpect were both written by Don Libes, NIST. -# -# Note that autoexpect does not guarantee a working script. It -# necessarily has to guess about certain things. Two reasons a script -# might fail are: -# -# 1) timing - A surprising number of programs (rn, ksh, zsh, telnet, -# etc.) and devices discard or ignore keystrokes that arrive "too -# quickly" after prompts. If you find your new script hanging up at -# one spot, try adding a short sleep just before the previous send. -# Setting "force_conservative" to 1 (see below) makes Expect do this -# automatically - pausing briefly before sending each character. This -# pacifies every program I know of. The -c flag makes the script do -# this in the first place. The -C flag allows you to define a -# character to toggle this mode off and on. - -set force_conservative 0 ;# set to 1 to force conservative mode even if - ;# script wasn't run conservatively originally -if {$force_conservative} { - set send_slow {1 .1} - proc send {ignore arg} { - sleep .1 - exp_send -s -- $arg - } -} - -# -# 2) differing output - Some programs produce different output each time -# they run. The "date" command is an obvious example. Another is -# ftp, if it produces throughput statistics at the end of a file -# transfer. If this causes a problem, delete these patterns or replace -# them with wildcards. An alternative is to use the -p flag (for -# "prompt") which makes Expect only look for the last line of output -# (i.e., the prompt). The -P flag allows you to define a character to -# toggle this mode off and on. -# -# Read the man page for more info. -# -# -Don - - -set timeout -1 -spawn apt install -y grass=7.8.2-1build3 grass-doc=7.8.2-1build3 -match_max 100000 -expect -exact "Country of origin for the keyboard: " -send -- "31\r" -expect -exact "Keyboard layout: " -send -- "1\r" -expect eof diff --git a/lib/aggregate_vector_inputs.py b/lib/aggregate_vector_inputs.py deleted file mode 100755 index 6d9b2abc9..000000000 --- a/lib/aggregate_vector_inputs.py +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env·python3 - -import os -import geopandas as gpd -import pandas as pd -from os.path import splitext -from utils.shared_variables import PREP_PROJECTION -from derive_headwaters import findHeadWaterPoints -from tqdm import tqdm - -in_dir ='data/inputs/nhdplus_vectors' -nhd_dir ='data/inputs/nhdplus_vectors_aggregate' -nwm_dir = 'data/inputs/nwm_hydrofabric' - -## NWM Headwaters -print ('deriving NWM headwater points') -nwm_streams = gpd.read_file(os.path.join(nwm_dir,'nwm_flows.gpkg')) -nwm_headwaters = findHeadWaterPoints(nwm_streams) -nwm_headwaters.to_file(os.path.join(nwm_dir,'nwm_headwaters.gpkg'),driver='GPKG',index=False) - -## NHDPlus HR -print ('aggregating NHDPlus HR burnline layers') -nhd_streams_wVAA_fileName_pre=os.path.join(nhd_dir,'NHDPlusBurnLineEvent_wVAA.gpkg') - -schema = {'geometry': 'MultiLineString','properties': {'NHDPlusID': 'str','ReachCode': 'str', - 'FromNode': 'str','ToNode': 'str', - 'StreamOrde': 'str','DnLevelPat': 'str', - 'LevelPathI': 'str'}} - -for huc in tqdm(os.listdir(in_dir)): - if not huc[0]=='#': - burnline_filename = os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '.gpkg') - vaa_filename = os.path.join(in_dir,huc,'NHDPlusFlowLineVAA' + str(huc) + '.gpkg') - flowline_filename = os.path.join(in_dir,huc,'NHDFlowline' + str(huc) + '.gpkg') - if os.path.exists(os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '.gpkg')): - burnline = gpd.read_file(burnline_filename) - nhd_streams_vaa = gpd.read_file(vaa_filename) - flowline = gpd.read_file(flowline_filename) - burnline = burnline[['NHDPlusID','ReachCode','geometry']] - flowline = flowline[['NHDPlusID','FCode']] - nhd_streams_vaa = nhd_streams_vaa[['FromNode','ToNode','NHDPlusID','StreamOrde','DnLevelPat','LevelPathI']] - nhd_streams_withVAA = burnline.merge(nhd_streams_vaa,on='NHDPlusID',how='inner') - nhd_streams_fcode = nhd_streams_withVAA.merge(flowline,on='NHDPlusID',how='inner') - nhd_streams = nhd_streams_fcode.to_crs(PREP_PROJECTION) - if os.path.isfile(nhd_streams_wVAA_fileName_pre): - nhd_streams.to_file(nhd_streams_wVAA_fileName_pre,driver='GPKG',index=False, mode='a') - else: - nhd_streams.to_file(nhd_streams_wVAA_fileName_pre,driver='GPKG',index=False) - else: - print ('missing data for huc ' + str(huc)) - else: - print ('skipping huc ' + str(huc)) diff --git a/lib/filter_catchments_and_add_attributes.py b/lib/filter_catchments_and_add_attributes.py deleted file mode 100755 index ad9e6e543..000000000 --- a/lib/filter_catchments_and_add_attributes.py +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python3 - -import geopandas as gpd -import pandas as pd -import numpy as np -import argparse -import sys - -input_catchments_fileName = sys.argv[1] -input_flows_fileName = sys.argv[2] -output_catchments_fileName = sys.argv[3] -output_flows_fileName = sys.argv[4] -wbd_fileName = sys.argv[5] -hucCode = str(sys.argv[6]) - -input_catchments = gpd.read_file(input_catchments_fileName) -wbd = gpd.read_file(wbd_fileName) -input_flows = gpd.read_file(input_flows_fileName) -# must drop leading zeroes -select_flows = tuple(map(str,map(int,wbd[wbd.HUC8.str.contains(hucCode)].fossid))) - -if input_flows.HydroID.dtype != 'str': input_flows.HydroID = input_flows.HydroID.astype(str) -output_flows = input_flows[input_flows.HydroID.str.startswith(select_flows)].copy() -if output_flows.HydroID.dtype != 'int': output_flows.HydroID = output_flows.HydroID.astype(int) - -if len(output_flows) > 0: - # merges input flows attributes and filters hydroids - if input_catchments.HydroID.dtype != 'int': input_catchments.HydroID = input_catchments.HydroID.astype(int) - output_catchments = input_catchments.merge(output_flows.drop(['geometry'],axis=1),on='HydroID') - - # filter out smaller duplicate features - duplicateFeatures = np.where(np.bincount(output_catchments['HydroID'])>1)[0] - # print(duplicateFeatures) - - for dp in duplicateFeatures: - # print(dp) - indices_of_duplicate = np.where(output_catchments['HydroID'] == dp)[0] - # print(indices_of_duplicate) - areas = output_catchments.iloc[indices_of_duplicate,:].geometry.area - # print(areas) - indices_of_smaller_duplicates = indices_of_duplicate[np.where(areas != np.amax(areas))[0]] - # print(indices_of_smaller_duplicates) - output_catchments = output_catchments.drop(output_catchments.index[indices_of_smaller_duplicates]) - - # add geometry column - output_catchments['areasqkm'] = output_catchments.geometry.area/(1000**2) - - output_catchments.to_file(output_catchments_fileName, driver="GPKG",index=False) - output_flows.to_file(output_flows_fileName, driver="GPKG", index=False) diff --git a/lib/fr_to_ms_raster_mask.py b/lib/fr_to_ms_raster_mask.py deleted file mode 100755 index 3f4d6e9b2..000000000 --- a/lib/fr_to_ms_raster_mask.py +++ /dev/null @@ -1,83 +0,0 @@ -#!/usr/bin/env python3 - -''' - Description: Mask raster layers using 'mainstems' stream buffer -''' - -import sys -import os -import geopandas as gpd -import rasterio.mask - -split_flows_fileName = sys.argv[1] -fdrFR = sys.argv[2] -nhddemFR = sys.argv[3] -slpFR = sys.argv[4] -fdrMSname = sys.argv[5] -nhddemMSname = sys.argv[6] -slpMSname = sys.argv[7] -strpixelFR = sys.argv[8] -strpixelMSname = sys.argv[9] -floodAOIbuf = sys.argv[10] - -# create output layer names -split_flows = gpd.read_file(split_flows_fileName) - -# Limit the rasters to the buffer distance around the draft streams. -print ("Limiting rasters to buffer area ({} meters) around model streams".format(str(floodAOIbuf))) - -MSsplit_flows_gdf_buffered = split_flows.unary_union.buffer(int(floodAOIbuf)) - -print('Writing raster outputs ...') - -# Mask nhddem -with rasterio.open(nhddemFR) as src: - out_image, out_transform = rasterio.mask.mask(src, [MSsplit_flows_gdf_buffered], crop=True) - out_meta = src.meta - -out_meta.update({"driver": "GTiff", - "height": out_image.shape[1], - "width": out_image.shape[2], - "transform": out_transform}) - -with rasterio.open(os.path.join(os.path.dirname(nhddemFR), nhddemMSname), "w", **out_meta) as dest: - dest.write(out_image) - -# Mask fdr -with rasterio.open(fdrFR) as src: - out_image, out_transform = rasterio.mask.mask(src, [MSsplit_flows_gdf_buffered], crop=True) - out_meta = src.meta - -out_meta.update({"driver": "GTiff", - "height": out_image.shape[1], - "width": out_image.shape[2], - "transform": out_transform}) - -with rasterio.open(os.path.join(os.path.dirname(fdrFR), fdrMSname), "w", **out_meta) as dest: - dest.write(out_image) - -# Mask slope -with rasterio.open(slpFR) as src: - out_image, out_transform = rasterio.mask.mask(src, [MSsplit_flows_gdf_buffered], crop=True) - out_meta = src.meta - -out_meta.update({"driver": "GTiff", - "height": out_image.shape[1], - "width": out_image.shape[2], - "transform": out_transform}) - -with rasterio.open(os.path.join(os.path.dirname(slpFR), slpMSname), "w", **out_meta) as dest: - dest.write(out_image) - -# Mask stream pixels -with rasterio.open(strpixelFR) as src: - out_image, out_transform = rasterio.mask.mask(src, [MSsplit_flows_gdf_buffered], crop=True) - out_meta = src.meta - -out_meta.update({"driver": "GTiff", - "height": out_image.shape[1], - "width": out_image.shape[2], - "transform": out_transform}) - -with rasterio.open(os.path.join(os.path.dirname(strpixelFR), strpixelMSname), "w", **out_meta) as dest: - dest.write(out_image) diff --git a/lib/make_stages_and_catchlist.py b/lib/make_stages_and_catchlist.py deleted file mode 100755 index 35627377c..000000000 --- a/lib/make_stages_and_catchlist.py +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env python3 - -import geopandas as gpd -import pandas as pd -import numpy as np -import argparse -import sys -import decimal - -flows_fileName = sys.argv[1] -catchments_fileName = sys.argv[2] -stages_fileName = sys.argv[3] -catchlist_fileName = sys.argv[4] -stages_min = float(sys.argv[5]) -stages_interval = float(sys.argv[6]) -stages_max = float(sys.argv[7]) - -flows = gpd.read_file(flows_fileName) -catchments = gpd.read_file(catchments_fileName) - - -hydroIDs = flows['HydroID'].tolist() -len_of_hydroIDs = len(hydroIDs) -slopes = flows['S0'].tolist() -lengthkm = flows['LengthKm'].tolist() -areasqkm = catchments['areasqkm'].tolist() - - -stages_max = stages_max + stages_interval -stages = np.round(np.arange(stages_min,stages_max,stages_interval),4) - -with open(stages_fileName,'w') as f: - f.write("Stage\n") - for stage in stages: - f.write("{}\n".format(stage)) - -with open(catchlist_fileName,'w') as f: - f.write("{}\n".format(len_of_hydroIDs)) - for h,s,l,a in zip(hydroIDs,slopes,lengthkm,areasqkm): - f.write("{} {} {} {}\n".format(h,s,l,a)) diff --git a/lib/raster.py b/lib/raster.py deleted file mode 100644 index a10a02430..000000000 --- a/lib/raster.py +++ /dev/null @@ -1,462 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -from osgeo import gdal, ogr, osr -import numpy as np -from os.path import isfile -from os import remove -from copy import deepcopy -from subprocess import call - -class Raster: - - """ - Raster object from single band rasters - - ... - - Attributes - ---------- - array : numpy array - raster data in numpy array - gt : list - geotransform. see gdal docs for more info. - proj : str - Projection string - ndv : number - No data value - des : str - band description - ct : gdal.colorTable - color table - dt : int - GDAL GDT data type. See notes. - dim : tuple - raster dimensions (bands, rows, columns) for multi-bands and (row, columns) for single band - nbands : int - number of bands. - nrows : int - number of rows - ncols : int - number of columns - - Methods - ------- - writeRaster(fileName,dtype=None,driverName='GTiff',verbose=False) - Write out raster file as geotiff - copy() - Copy method. Uses deepcopy since array data is present - clipToVector(raster_fileName,vector_fileName,verbose=False,output_fileType='GTiff',output_fileName=None,loadOutput=True) - Clips to vector using gdalwarp command line utility - - Raises - ------ - OSError - If fileName does not exist - ValueError - Raises if input raster - - See Also - -------- - - Notes - ----- - Currently only accepts single band rasters. - - Multiple datatypes are used. The table below shows which numpy datatypes correspond to the the GDAL types and their integer codes. - - # ## Integer Code ## ## Global Descriptor Table ## ## Numpy ## - # 0 GDT_Unknown NA - # 1 GDT_Byte np.bool, np.int ,np.int8, np.long, np.byte, np.uint8 - # 2 GDT_UInt16 np.uint16, np.ushort - # 3 GDT_Int16 np.int16, np.short - # 4 GDT_UInt32 np.uint32 , np.uintc - # 5 GDT_Int32 np.int32, np.intc - # 6 GDT_Float32 np.float32, np.single - # 7 GDT_Float64 np.float64, np.double - # 8 GDT_CInt16 np.complex64 - # 9 GDT_CInt32 np.complex64 - # 10 GDT_CFloat32 np.complex64 - # 11 GDT_CFloat64 np.complex128 - # 12 GDT_TypeCount NA - - Examples - -------- - Load Raster - >>> rasterData = fldpln.Raster('path/to/raster') - - """ - - # converts numpy datatypes and gdal GDT variables to integer codes - dataTypeConversion_name_to_integer = { np.int8 : 1 , np.bool : 1 , np.int : 1 , np.long : 1 , np.byte : 1, np.uint8 : 1, - np.uint16 : 2 , np.int16 : 3 , - np.ushort : 2 , np.short : 3 , - np.uint32 : 4 , np.uintc : 4 , np.int32 : 5 , np.intc : 5 , - np.float32 : 6 , np.single : 6 , - np.float64 : 7 , np.double : 7 , - np.complex64 : 10 , np.complex128 : 11 , - 0:0,1:1,2:2,3:3,4:4,5:5,6:6,7:7,8:8,9:9,10:10,11:11,12:12 } - - # converts integer codes and gdal GDT variables to numpy datatypes - dataTypeConversion_integer_to_name = {0 : np.complex128 , 1 : np.int8 , 2 : np.uint16 , 3 : np.int16 , - 4 : np.uint32 , 5 : np.int32 , 6 : np.float32 , 7 : np.float64 , - 8 : np.complex64 , 9 : np.complex64 , 10 : np.complex64 , 11 : np.complex128 } - - - def __init__(self,fileName,loadArray=True,dtype=None): - - """ - Initializes Raster Instance from single band raster - - ... - - Parameters - ---------- - fileName : str - File path to single band raster - dtype : numpy datatype or int, optional - Numpy, GDT, or integer code data type used to override the data type on the file when imported to array (Default Value = None, None sets to the numpy array data type to the one in the raster file) - - Returns - ------- - raster - Instance of raster object - - """ - - if not isfile(fileName): - raise OSError("File \'{}\' does not exist".format(fileName)) - - stream = gdal.Open(fileName,gdal.GA_ReadOnly) - - self.nrows,self.ncols = stream.RasterYSize , stream.RasterXSize - self.nbands = stream.RasterCount - - if loadArray: - self.array = stream.ReadAsArray() - - self.gt = stream.GetGeoTransform() - self.proj = stream.GetProjection() - - # if self.nbands > 1: - # raise ValueError('Raster class only accepts single band rasters for now') - - band = stream.GetRasterBand(1) - - self.ndv = band.GetNoDataValue() - - # set data type - if dtype is not None: # override raster file type - - # sets dt to dtype integer code - try: - self.dt = self.dataTypeConversion_name_to_integer[dtype] - except KeyError: - raise ValueError('{} dtype parameter not accepted. check docs for valid input or set to None to use data type from raster'.format(dtype)) - - # sets array data type - if isinstance(dtype,type): # if dtype is a numpy data tpe - - self.array = self.array.astype(dtype) - - else: # if dtype is an integer code of GDAL GDT variable - - try: - self.array = self.array.astype(self.dataTypeConversion_integer_to_name[dtype]) - except KeyError: - raise ValueError('{} dtype parameter not accepted. check docs for valid input or set to None to use data type from raster'.format(dtype)) - - else: # sets to default data type in raster file - - self.dt = band.DataType - - try: - self.array.astype(self.dataTypeConversion_integer_to_name[self.dt]) - except KeyError: - raise ValueError('{} dtype parameter not accepted. check docs for valid input or set to None to use data type from raster'.format(self.dt)) - - try: - self.des = band.GetDescription() - except AttributeError: - pass - - try: - self.ct = stream.GetRasterColorTable() - except AttributeError: - pass - - # self.dim = self.array.shape - self.fileName = fileName - - stream,band = None,None - - - @property - def dim(self): - """ Property method for number of dimensions """ - - if self.nbands == 1: - DIMS = self.nrows,self.ncols - if self.nbands > 1: - DIMS = self.nbands,self.nrows,self.ncols - - return(DIMS) - - - def copy(self): - """ Copy method. Uses deepcopy since array data is present """ - return(deepcopy(self)) - - - def writeRaster(self,fileName,dtype=None,driverName='GTiff',verbose=False): - - """ - Write out raster file as geotiff - - Parameters - ---------- - fileName : str - File path to output raster to - dtype : numpy datatype or int, optional - Numpy, GDT, or integer code data type (Default Value = self.dt attribute value, otherwise uses data type from the numpy array) - driverName : str, optional - GDAL driver type. See gdal docs for more details. Only tested for GTiff. (Default Value = 'GTiff') - verbose : Boolean, optional - Verbose output (Default Value = False) - - Returns - ------- - None - - Raises - ------ - ValueError - Raises ValueError when the data type parameter is not recognized. See the help docs for raster class to see which numpy, gdal, or encoded values are accepted. - - Examples - -------- - Write Geotiff raster - >>> rasterData = fldpln.Raster('path/to/raster') - >>> rasterData.writeRaster('/different/path/to/raster',dtype=np.int8) - - """ - - driver = gdal.GetDriverByName(driverName) - - if dtype is None: - try: - dtype = self.dt - except AttributeError: - # dtype = gdal.GDT_Float64 - try: - dtype = self.dataTypeConversion_name_to_integer[self.array.dtype] - except KeyError: - raise ValueError('{} dtype parameter not accepted. check docs for valid input or set to None to use data type from numpy array'.format(self.array.dtype)) - else: - try: - dtype = self.dataTypeConversion_name_to_integer[dtype] - except KeyError: - raise ValueError('{} dtype parameter not accepted. check docs for valid input or set to None to use data type from numpy array'.format(self.array.dtype)) - - dataset = driver.Create(fileName, self.ncols, self.nrows, 1, dtype) - dataset.SetGeoTransform(self.gt) - dataset.SetProjection(self.proj) - band = dataset.GetRasterBand(1) - - # set color table and color interpretation - #print(band.__dict__) - try: - band.SetRasterColorTable(self.ct) - #band.SetRasterColorInterpretation(gdal.GCI_PaletteIndex) - except AttributeError: - pass - - try: - band.SetDescription(self.des) - except AttributeError: - pass - - band.SetNoDataValue(self.ndv) - band.WriteArray(self.array) - band, dataset = None,None # Close the file - - if verbose: - print("Successfully wrote out raster to {}".format(fileName)) - - def polygonize(self,vector_fileName,vector_driver,layer_name,verbose): - - gdal.UseExceptions() - - # get raster datasource - # - src_ds = gdal.Open( self.fileName ) - srcband = src_ds.GetRasterBand(1) - - # - # create output datasource - driver_ext_dict = {'ESRI Shapefile' : 'shp' , 'GPKG' : 'gpkg'} - - if vector_driver not in driver_ext_dict: - raise ValueError('Driver not found in {}'.format(driver_ext_dict)) - - drv = ogr.GetDriverByName(vector_driver) - dst_ds = drv.CreateDataSource( vector_fileName) - - srs = osr.SpatialReference() - srs.ImportFromWkt(self.proj) - - dst_layer = dst_ds.CreateLayer(layer_name, srs = srs, geom_type = ogr.wkbPolygon ) - - if verbose: - prog_func = gdal.TermProgress_nocb - else: - prog_func = None - - gdal.Polygonize( srcband, None, dst_layer, -1, ['8CONNECTED=8'], callback=prog_func ) - - @classmethod - def clipToVector(cls,raster_fileName,vector_fileName,output_fileName=None,output_fileType='GTiff',verbose=False): - """ - Clips to vector using gdalwarp command line utility - - ... - - Parameters - ---------- - raster_fileName : str - File path to raster to clip - vector_fileName : str - File path to vector layer to clip with - output_fileName : str - Set file path to output clipped raster (Default Value = None) - output_fileType : str - Set file type of output from GDAL drivers list (Default Value = 'GTiff') - verbose : Boolean - Verbose output (Default Value = False) - - Returns - ------- - raster : raster - Clipped raster layer - - Notes - ----- - gdalwarp utility must be installed and callable via a subprocess - - Examples - -------- - clip raster and don't return - >>> fldpln.raster.clipToVector('path/to/raster','path/to/clipping/vector','path/to/write/output/raster/to') - Clip raster and return but don't write - >>> clippedRaster = fldpln.raster.clipToVector('path/to/raster','path/to/clipping/vector') - - - """ - - # create temp output if none is desired - if output_fileName is None: - output_fileName = 'temp.tif' - - # generate command - command = ['gdalwarp','-overwrite','-of',output_fileType,'-cutline',vector_fileName,'-crop_to_cutline',raster_fileName,output_fileName] - - # insert quiet flag if not verbose - if not verbose: - command = command.insert(1,'-q') - - # call command - call(command) - - # remove temp file - if output_fileName is None: - remove(output_fileName) - - return(cls(output_fileName)) - - def getCoordinatesFromIndex(self,row,col): - """ - Returns coordinates in the rasters projection from a given multi-index - - """ - - # extract variables for readability - x_upper_limit, y_upper_limit = self.gt[0], self.gt[3] - x_resolution, y_resolution = self.gt[1], self.gt[5] - nrows, ncols = self.nrows, self.ncols - - x = x_upper_limit + (col * x_resolution) - y = y_upper_limit + (row * y_resolution) - - return(x,y) - - - def sampleFromCoordinates(self,x,y,returns='value'): - """ - Sample raster value from coordinates - ... - - Parameters - ---------- - raster_fileName : str - File path to raster to clip - vector_fileName : str - File path to vector layer to clip with - output_fileName : str - Set file path to output clipped raster (Default Value = None) - output_fileType : str - Set file type of output from GDAL drivers list (Default Value = 'GTiff') - verbose : Boolean - Verbose output (Default Value = False) - - Returns - ------- - raster : raster - Clipped raster layer - - Notes - ----- - gdalwarp utility must be installed and callable via a subprocess - - Examples - -------- - clip raster and don't return - >>> fldpln.raster.clipToVector('path/to/raster','path/to/clipping/vector','path/to/write/output/raster/to') - Clip raster and return but don't write - >>> clippedRaster = fldpln.raster.clipToVector('path/to/raster','path/to/clipping/vector') - - - """ - - # extract variables for readability - x_upper_limit, y_upper_limit = self.gt[0], self.gt[3] - x_resolution, y_resolution = self.gt[1], self.gt[5] - nrows, ncols = self.nrows, self.ncols - - # get upper left hand corner coordinates from the centroid coordinates of the upper left pixel - x_upper_limit = x_upper_limit - (x_resolution/2) - y_upper_limit = y_upper_limit - (y_resolution/2) - - # get indices - columnIndex = int( ( x - x_upper_limit) / x_resolution) - rowIndex = int( ( y - y_upper_limit) / y_resolution) - - # check indices lie within raster limits - columnIndexInRange = ncols > columnIndex >= 0 - rowIndexInRange = nrows > rowIndex >= 0 - - if (not columnIndexInRange) | (not rowIndexInRange): - raise ValueError("Row Index {} or column index {} not in raster range ({},{})".format(rowIndex,columnIndex,nrows,ncols)) - - # check value is not ndv - if self.array[rowIndex,columnIndex] == self.ndv: - raise ValueError("Sample value is no data at ({},{})".format(nrows,ncols)) - - # return if statements - if returns == 'value': - return(self.array[rowIndex,columnIndex]) - elif returns == 'multi-index': - return(rowIndex,columnIndex) - elif returns == 'ravel-index': - return(np.ravel_multi_index((rowIndex,columnIndex),(nrows,ncols))) - else: - raise ValueError('Enter valid returns argument') diff --git a/lib/reachID_grid_to_vector_points.py b/lib/reachID_grid_to_vector_points.py deleted file mode 100755 index c4a54b2ec..000000000 --- a/lib/reachID_grid_to_vector_points.py +++ /dev/null @@ -1,85 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 - -from osgeo import gdal -import numpy as np -import osgeo.ogr -import osgeo.osr -import sys - -import cProfile -from tqdm import tqdm -import geopandas as gpd -from shapely.geometry import Point -from raster import Raster - -""" -USAGE: -./reachID_grid_to_vector_points.py - -""" - -path = sys.argv[1] -outputFileName = sys.argv[2] -writeOption = sys.argv[3] - -#r = gdal.Open(path) -#band = r.GetRasterBand(1) -boolean=Raster(path) - -#(upper_left_x, x_size, x_rotation, upper_left_y, y_rotation, y_size) = r.GetGeoTransform() -(upper_left_x, x_size, x_rotation, upper_left_y, y_rotation, y_size) = boolean.gt - -#a = band.ReadAsArray().astype(np.float) - -# indices = np.nonzero(a != band.GetNoDataValue()) -indices = np.nonzero(boolean.array >= 1) - -# Init the shapefile stuff.. -#srs = osgeo.osr.SpatialReference() -#srs.ImportFromWkt(r.GetProjection()) - -#driver = osgeo.ogr.GetDriverByName('GPKG') -#shapeData = driver.CreateDataSource(outputFileName) - -#layer = shapeData.CreateLayer('ogr_pts', srs, osgeo.ogr.wkbPoint) -#layerDefinition = layer.GetLayerDefn() - -#idField = osgeo.ogr.FieldDefn("id", osgeo.ogr.OFTInteger) -#layer.CreateField(idField) - -id =[None] * len(indices[0]);points = [None]*len(indices[0]) - -# Iterate over the Numpy points.. -i = 1 -for y_index,x_index in tqdm(zip(*indices),total=len(indices[0])): - x = x_index * x_size + upper_left_x + (x_size / 2) #add half the cell size - y = y_index * y_size + upper_left_y + (y_size / 2) #to centre the point - - # get raster value - #reachID = a[y_index,x_index] - - #point = osgeo.ogr.Geometry(osgeo.ogr.wkbPoint) - #point.SetPoint(0, x, y) - points[i-1] = Point(x,y) - - #feature = osgeo.ogr.Feature(layerDefinition) - #feature.SetGeometry(point) - #feature.SetFID(i) - if writeOption == 'reachID': - reachID = a[y_index,x_index] - id[i-1] = reachID - #feature.SetField("id",reachID) - elif (writeOption == 'featureID') |( writeOption == 'pixelID'): - #feature.SetField("id",i) - id[i-1] = i - #layer.CreateFeature(feature) - - i += 1 - -pointGDF = gpd.GeoDataFrame({'id' : id, 'geometry' : points},crs=boolean.proj,geometry='geometry') -pointGDF.to_file(outputFileName,driver='GPKG',index=False) - -print("Complete") -#shapeData.Destroy() - diff --git a/lib/rem.py b/lib/rem.py deleted file mode 100755 index aa7faeafa..000000000 --- a/lib/rem.py +++ /dev/null @@ -1,124 +0,0 @@ -#!/usr/bin/env python3 - -from numba import njit, typeof, typed, types -import rasterio -import numpy as np -import argparse -import os -from osgeo import ogr, gdal - - -def rel_dem(dem_fileName, pixel_watersheds_fileName, rem_fileName, thalweg_raster): - """ - Calculates REM/HAND/Detrended DEM - - Parameters - ---------- - dem_fileName : str - File name of pit filled DEM raster. - pixel_watersheds_fileName : str - File name of stream pixel watersheds raster. - rem_fileName : str - File name of output relative elevation raster. - - """ - - - # ------------------------------------------- Get catchment_min_dict --------------------------------------------------- # - # The following creates a dictionary of the catchment ids (key) and their elevation along the thalweg (value). - @njit - def make_catchment_min_dict(flat_dem, catchment_min_dict, flat_catchments, thalweg_window): - - for i,cm in enumerate(flat_catchments): - if thalweg_window[i] == 1: # Only allow reference elevation to be within thalweg. - # If the catchment really exists in the dictionary, compare elevation values. - if (cm in catchment_min_dict): - if (flat_dem[i] < catchment_min_dict[cm]): - # If the flat_dem's elevation value is less than the catchment_min_dict min, update the catchment_min_dict min. - catchment_min_dict[cm] = flat_dem[i] - else: - catchment_min_dict[cm] = flat_dem[i] - return(catchment_min_dict) - - # Open the masked gw_catchments_pixels_masked and dem_thalwegCond_masked. - gw_catchments_pixels_masked_object = rasterio.open(pixel_watersheds_fileName) - dem_thalwegCond_masked_object = rasterio.open(dem_fileName) - thalweg_raster_object = rasterio.open(thalweg_raster) - - # Specify raster object metadata. - meta = dem_thalwegCond_masked_object.meta.copy() - meta['tiled'], meta['compress'] = True, 'lzw' - - # -- Create catchment_min_dict -- # - catchment_min_dict = typed.Dict.empty(types.int32,types.float32) # Initialize an empty dictionary to store the catchment minimums. - # Update catchment_min_dict with pixel sheds minimum. - for ji, window in dem_thalwegCond_masked_object.block_windows(1): # Iterate over windows, using dem_rasterio_object as template. - dem_window = dem_thalwegCond_masked_object.read(1,window=window).ravel() # Define dem_window. - catchments_window = gw_catchments_pixels_masked_object.read(1,window=window).ravel() # Define catchments_window. - thalweg_window = thalweg_raster_object.read(1, window=window).ravel() # Define cost_window. - - # Call numba-optimized function to update catchment_min_dict with pixel sheds minimum. - catchment_min_dict = make_catchment_min_dict(dem_window, catchment_min_dict, catchments_window, thalweg_window) - - dem_thalwegCond_masked_object.close() - gw_catchments_pixels_masked_object.close() - thalweg_raster_object.close() - # ------------------------------------------------------------------------------------------------------------------------ # - - - # ------------------------------------------- Produce relative elevation model ------------------------------------------- # - @njit - def calculate_rem(flat_dem,catchmentMinDict,flat_catchments,ndv): - - rem_window = np.zeros(len(flat_dem),dtype=np.float32) - for i,cm in enumerate(flat_catchments): - if cm in catchmentMinDict: - if catchmentMinDict[cm] == ndv: - rem_window[i] = ndv - else: - rem_window[i] = flat_dem[i] - catchmentMinDict[cm] - - return(rem_window) - - rem_rasterio_object = rasterio.open(rem_fileName,'w',**meta) # Open rem_rasterio_object for writing to rem_fileName. - pixel_catchments_rasterio_object = rasterio.open(pixel_watersheds_fileName) # Open pixel_catchments_rasterio_object - dem_rasterio_object = rasterio.open(dem_fileName) - - for ji, window in dem_rasterio_object.block_windows(1): - dem_window = dem_rasterio_object.read(1,window=window) - window_shape = dem_window.shape - - dem_window = dem_window.ravel() - catchments_window = pixel_catchments_rasterio_object.read(1,window=window).ravel() - - rem_window = calculate_rem(dem_window, catchment_min_dict, catchments_window, meta['nodata']) - rem_window = rem_window.reshape(window_shape).astype(np.float32) - - rem_rasterio_object.write(rem_window, window=window, indexes=1) - - dem_rasterio_object.close() - pixel_catchments_rasterio_object.close() - rem_rasterio_object.close() - # ------------------------------------------------------------------------------------------------------------------------ # - - -if __name__ == '__main__': - - # parse arguments - parser = argparse.ArgumentParser(description='Relative elevation from pixel based watersheds') - parser.add_argument('-d','--dem', help='DEM to use within project path', required=True) - parser.add_argument('-w','--watersheds',help='Pixel based watersheds raster to use within project path',required=True) - parser.add_argument('-t','--thalweg-raster',help='A binary raster representing the thalweg. 1 for thalweg, 0 for non-thalweg.',required=True) - parser.add_argument('-o','--rem',help='Output REM raster',required=True) - - - # extract to dictionary - args = vars(parser.parse_args()) - - # rename variable inputs - dem_fileName = args['dem'] - pixel_watersheds_fileName = args['watersheds'] - rem_fileName = args['rem'] - thalweg_raster = args['thalweg_raster'] - - rel_dem(dem_fileName, pixel_watersheds_fileName, rem_fileName, thalweg_raster) diff --git a/lib/snap_and_clip_to_nhd.py b/lib/snap_and_clip_to_nhd.py deleted file mode 100755 index 7c715e06f..000000000 --- a/lib/snap_and_clip_to_nhd.py +++ /dev/null @@ -1,298 +0,0 @@ -#!/usr/bin/env python3 - -import sys -import geopandas as gpd -import pandas as pd -from collections import deque,Counter -import numpy as np -from tqdm import tqdm -import argparse -from os.path import splitext,isfile -from shapely.strtree import STRtree -from shapely.geometry import Point,MultiLineString,LineString,mapping,MultiPolygon,Polygon - -def subset_vector_layers(hucCode,nwm_streams_fileName,nwm_headwaters_fileName,nhd_streams_fileName,nwm_lakes_fileName,nld_lines_fileName,nwm_catchments_fileName,wbd_fileName,wbd_buffer_fileName,ahps_sites_fileName,landsea_filename,subset_nhd_streams_fileName,subset_nwm_lakes_fileName,subset_nld_lines_fileName,subset_nwm_headwaters_fileName,subset_nwm_catchments_fileName,subset_nwm_streams_fileName,subset_landsea_filename,subset_nhd_headwaters_fileName=None,dissolveLinks=False,extent='FR'): - - hucUnitLength = len(str(hucCode)) - - wbd = gpd.read_file(wbd_fileName) - wbd_buffer = gpd.read_file(wbd_buffer_fileName) - projection = wbd.crs - - # Clip ocean water polygon for future masking ocean areas (where applicable) - landsea_read = gpd.read_file(landsea_filename, mask = wbd_buffer) - landsea = gpd.clip(landsea_read, wbd_buffer) - if not landsea.empty: - landsea.to_file(subset_landsea_filename,driver=getDriver(subset_landsea_filename),index=False) - del landsea - - # find intersecting lakes and writeout - print("Subsetting NWM Lakes for HUC{} {}".format(hucUnitLength,hucCode),flush=True) - nwm_lakes = gpd.read_file(nwm_lakes_fileName, mask = wbd_buffer) - - if not nwm_lakes.empty: - # perform fill process to remove holes/islands in the NWM lake polygons - nwm_lakes = nwm_lakes.explode() - nwm_lakes_fill_holes=MultiPolygon(Polygon(p.exterior) for p in nwm_lakes['geometry']) # remove donut hole geometries - # loop through the filled polygons and insert the new geometry - for i in range(len(nwm_lakes_fill_holes)): - nwm_lakes.loc[i,'geometry'] = nwm_lakes_fill_holes[i] - nwm_lakes.to_file(subset_nwm_lakes_fileName,driver=getDriver(subset_nwm_lakes_fileName),index=False) - del nwm_lakes - - # find intersecting levee lines - print("Subsetting NLD levee lines for HUC{} {}".format(hucUnitLength,hucCode),flush=True) - nld_lines = gpd.read_file(nld_lines_fileName, mask = wbd) - if not nld_lines.empty: - nld_lines.to_file(subset_nld_lines_fileName,driver=getDriver(subset_nld_lines_fileName),index=False) - del nld_lines - - # find intersecting nwm_catchments - print("Subsetting NWM Catchments for HUC{} {}".format(hucUnitLength,hucCode),flush=True) - nwm_catchments = gpd.read_file(nwm_catchments_fileName, mask = wbd) - nwm_catchments.to_file(subset_nwm_catchments_fileName,driver=getDriver(subset_nwm_catchments_fileName),index=False) - del nwm_catchments - - # query nhd+HR streams for HUC code - print("Querying NHD Streams for HUC{} {}".format(hucUnitLength,hucCode),flush=True) - nhd_streams = gpd.read_file(nhd_streams_fileName, mask = wbd_buffer) - nhd_streams = nhd_streams.explode() - - # find intersecting nwm_headwaters - print("Subsetting NWM Streams and deriving headwaters for HUC{} {}".format(hucUnitLength,hucCode),flush=True) - nwm_streams = gpd.read_file(nwm_streams_fileName, mask = wbd_buffer) - nwm_streams.to_file(subset_nwm_streams_fileName,driver=getDriver(subset_nwm_streams_fileName),index=False) - del nwm_streams - - # get nhd headwaters closest to nwm headwater points - print('Identify NHD Headwater streams nearest to NWM Headwater points',flush=True) - nhd_streams.loc[:,'is_nwm_headwater'] = False - # nhd_streams_tree = STRtree(nhd_streams.geometry) - - if extent == 'FR': - nwm_headwaters = gpd.read_file(nwm_headwaters_fileName, mask = wbd_buffer) - elif extent == 'MS': - nwm_headwaters = gpd.read_file(ahps_sites_fileName, mask = wbd) - - # check for incoming MS streams and convert to points - intersecting = nhd_streams.crosses(wbd.geometry[0]) - incoming_flows = nhd_streams.loc[intersecting,:] - incoming_points_list = [] - - if len(incoming_flows) > 0: - for i,linesting in enumerate(incoming_flows.geometry): - incoming_points_list = incoming_points_list + [linesting.coords[-1]] - - geometry = [Point(xy) for xy in zip(incoming_points_list)] - incoming_points = gpd.GeoDataFrame({'feature_id' : 0 ,'nwsid' : 'huc8_incoming' ,'geometry':geometry}, crs=nhd_streams.crs, geometry='geometry') - - if (len(nwm_headwaters) > 0) or (len(incoming_points) > 0): - - if len(nwm_headwaters) > 0: - print ("Snapping forecasting points to nhd stream network") - streamlines_union = nhd_streams.geometry.unary_union - snapped_geoms = [] - snappedpoints_df = pd.DataFrame(nwm_headwaters).drop(columns=['geometry']) - - # snap lines to streams - for i in range(len(nwm_headwaters)): - snapped_geoms.append(streamlines_union.interpolate(streamlines_union.project(nwm_headwaters.geometry[i]))) - - snappedpoints_df['geometry'] = snapped_geoms - snapped_points = gpd.GeoDataFrame(snappedpoints_df,crs=nhd_streams.crs) - - if (len(incoming_points) > 0) and (len(nwm_headwaters) > 0): - nwm_headwaters = snapped_points.append(incoming_points).reset_index(drop=True) - elif len(incoming_points) > 0: - nwm_headwaters = incoming_points.copy() - else: - print ("No AHPs point(s) within HUC " + str(hucCode) + " boundaries.") - sys.exit(0) - - for index, row in tqdm(nwm_headwaters.iterrows(),total=len(nwm_headwaters)): - distances = nhd_streams.distance(row['geometry']) - # nearestGeom = nhd_streams_tree.nearest(row['geometry']) - min_index = np.argmin(distances) - nhd_streams.loc[min_index,'is_nwm_headwater'] = True - - nhd_streams = nhd_streams.loc[nhd_streams.geometry!=None,:] # special case: remove segments without geometries - - # writeout nwm headwaters - if not nwm_headwaters.empty: - nwm_headwaters.to_file(subset_nwm_headwaters_fileName,driver=getDriver(subset_nwm_headwaters_fileName),index=False) - del nwm_headwaters - - # copy over headwater features to nwm streams - nhd_streams['is_nwm_stream'] = nhd_streams['is_nwm_headwater'].copy() - - # trace down from NWM Headwaters - print('Identify NHD streams downstream of relevant NHD Headwater streams',flush=True) - nhd_streams.set_index('NHDPlusID',inplace=True,drop=False) - - Q = deque(nhd_streams.loc[nhd_streams['is_nwm_headwater'],'NHDPlusID'].tolist()) - visited = set() - - while Q: - q = Q.popleft() - if q in visited: - continue - visited.add(q) - toNode,DnLevelPat = nhd_streams.loc[q,['ToNode','DnLevelPat']] - try: - downstream_ids = nhd_streams.loc[nhd_streams['FromNode'] == toNode,:].index.tolist() - except ValueError: # 18050002 has duplicate nhd stream feature - if len(toNode.unique()) == 1: - toNode = toNode.iloc[0] - downstream_ids = nhd_streams.loc[nhd_streams['FromNode'] == toNode,:].index.tolist() - #If multiple downstream_ids are returned select the ids that are along the main flow path (i.e. exclude segments that are diversions) - if len(set(downstream_ids))>1: # special case: remove duplicate NHDPlusIDs - relevant_ids = [segment for segment in downstream_ids if DnLevelPat == nhd_streams.loc[segment,'LevelPathI']] - else: - relevant_ids = downstream_ids - nhd_streams.loc[relevant_ids,'is_nwm_stream'] = True - for i in relevant_ids: - if i not in visited: - Q.append(i) - - nhd_streams = nhd_streams.loc[nhd_streams['is_nwm_stream'],:] - - if dissolveLinks: - # remove multi-line strings - print("Dissolving NHD reaches to Links (reaches constrained to stream intersections)",flush=True) - - nhd_streams.set_index('NHDPlusID',inplace=True,drop=False) - nhd_streams['before_confluence'] = nhd_streams.duplicated(subset='ToNode',keep=False) - - nhd_streams.loc[nhd_streams['is_nwm_headwater'],'linkNo'] = np.arange(1,nhd_streams['is_nwm_headwater'].sum()+1) - - Q = deque(nhd_streams.loc[nhd_streams['is_nwm_headwater'],'NHDPlusID'].tolist()) - visited = set() - linkNo = np.max(nhd_streams.loc[nhd_streams['is_nwm_headwater'],'linkNo']) + 1 - link_geometries = dict() - - # adds all headwaters to link_geometries - for q in Q: - link_geometries[nhd_streams.loc[q,'linkNo']] = [p for p in zip(*nhd_streams.loc[q,'geometry'].coords.xy)][::-1] - - # Do BFS - while Q: - q = Q.popleft() - - if q in visited: - continue - - visited.add(q) - - toNode = nhd_streams.loc[q,'ToNode'] - - downstream_ids = nhd_streams.loc[nhd_streams['FromNode'] == toNode,:].index.tolist() - numberOfDownstreamIDs = len(downstream_ids) - - for i in downstream_ids: - if i not in visited: - Q.append(i) - - if nhd_streams.loc[q,'before_confluence'] or (numberOfDownstreamIDs > 1): - # do not dissolve - linkNo += 1 - nhd_streams.loc[i,'linkNo'] = linkNo - - next_stream_geometry = [p for p in zip(*nhd_streams.loc[i,'geometry'].coords.xy)][::-1] - - link_geometries[linkNo] = next_stream_geometry - - else: - nhd_streams.loc[i,'linkNo'] = nhd_streams.loc[q,'linkNo'] - - next_stream_geometry = [p for p in zip(*nhd_streams.loc[i,'geometry'].coords.xy)][::-1] - - link_geometries[nhd_streams.loc[i,'linkNo']] = link_geometries[nhd_streams.loc[i,'linkNo']] + next_stream_geometry - - - # convert dictionary to lists for keys (linkNos) and values (geometry linestrings) - output_links = [] ; output_geometries = [] - for ln_no, ln_geom in link_geometries.items(): - output_links = output_links + [ln_no] - output_geometries = output_geometries + [LineString(ln_geom)] - - nhd_streams = gpd.GeoDataFrame({'linkNO' : output_links,'geometry': output_geometries},geometry='geometry',crs=projection) - - # write to files - nhd_streams.reset_index(drop=True,inplace=True) - nhd_streams.to_file(subset_nhd_streams_fileName,driver=getDriver(subset_nhd_streams_fileName),index=False) - - if subset_nhd_headwaters_fileName is not None: - # identify all nhd headwaters - print('Identify NHD headwater points',flush=True) - nhd_headwater_streams = nhd_streams.loc[nhd_streams['is_nwm_headwater'],:] - nhd_headwater_streams = nhd_headwater_streams.explode() - - hw_points = np.zeros(len(nhd_headwater_streams),dtype=object) - for index,lineString in enumerate(nhd_headwater_streams.geometry): - hw_point = [point for point in zip(*lineString.coords.xy)][-1] - hw_points[index] = Point(*hw_point) - - nhd_headwater_points = gpd.GeoDataFrame({'NHDPlusID' : nhd_headwater_streams['NHDPlusID'], - 'geometry' : hw_points},geometry='geometry',crs=projection) - - nhd_headwater_points.to_file(subset_nhd_headwaters_fileName,driver=getDriver(subset_nhd_headwaters_fileName),index=False) - del nhd_headwater_streams, nhd_headwater_points - -def getDriver(fileName): - - driverDictionary = {'.gpkg' : 'GPKG','.geojson' : 'GeoJSON','.shp' : 'ESRI Shapefile'} - driver = driverDictionary[splitext(fileName)[1]] - - return(driver) - - -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description='Subset vector layers') - parser.add_argument('-d','--hucCode', help='HUC boundary ID', required=True,type=str) - parser.add_argument('-w','--nwm-streams', help='NWM flowlines', required=True) - parser.add_argument('-f','--nwm-headwaters', help='NWM headwater points', required=True) - parser.add_argument('-s','--nhd-streams',help='NHDPlus HR burnline',required=True) - parser.add_argument('-l','--nwm-lakes', help='NWM Lakes', required=True) - parser.add_argument('-r','--nld-lines', help='Levee vectors to use within project path', required=True) - parser.add_argument('-m','--nwm-catchments', help='NWM catchments', required=True) - parser.add_argument('-u','--wbd',help='HUC boundary',required=True) - parser.add_argument('-g','--wbd-buffer',help='Buffered HUC boundary',required=True) - parser.add_argument('-y','--ahps-sites',help='Buffered HUC boundary',required=True) - parser.add_argument('-v','--landsea',help='LandSea - land boundary',required=True) - parser.add_argument('-c','--subset-nhd-streams',help='NHD streams subset',required=True) - parser.add_argument('-a','--subset-lakes',help='NWM lake subset',required=True) - parser.add_argument('-t','--subset-nwm-headwaters',help='NWM headwaters subset',required=True) - parser.add_argument('-z','--subset-nld-lines',help='Subset of NLD levee vectors for HUC',required=True) - parser.add_argument('-e','--subset-nhd-headwaters',help='NHD headwaters subset',required=True,default=None) - parser.add_argument('-n','--subset-catchments',help='NWM catchments subset',required=True) - parser.add_argument('-b','--subset-nwm-streams',help='NWM streams subset',required=True) - parser.add_argument('-x','--subset-landsea',help='LandSea subset',required=True) - parser.add_argument('-o','--dissolve-links',help='remove multi-line strings',action="store_true",default=False) - parser.add_argument('-p','--extent',help='MS or FR extent',required=True) - - args = vars(parser.parse_args()) - - hucCode = args['hucCode'] - nwm_streams_fileName = args['nwm_streams'] - nwm_headwaters_fileName = args['nwm_headwaters'] - nhd_streams_fileName = args['nhd_streams'] - nwm_lakes_fileName = args['nwm_lakes'] - nld_lines_fileName = args['nld_lines'] - nwm_catchments_fileName = args['nwm_catchments'] - wbd_fileName = args['wbd'] - wbd_buffer_fileName = args['wbd_buffer'] - ahps_sites_fileName = args['ahps_sites'] - landsea_fileName = args['landsea'] - subset_nhd_streams_fileName = args['subset_nhd_streams'] - subset_nwm_lakes_fileName = args['subset_lakes'] - subset_nwm_headwaters_fileName = args['subset_nwm_headwaters'] - subset_nld_lines_fileName = args['subset_nld_lines'] - subset_nwm_catchments_fileName = args['subset_catchments'] - subset_nhd_headwaters_fileName = args['subset_nhd_headwaters'] - subset_nwm_streams_fileName = args['subset_nwm_streams'] - subset_landsea_filename = args['subset_landsea'] - dissolveLinks = args['dissolve_links'] - extent = args['extent'] - - subset_vector_layers(hucCode,nwm_streams_fileName,nwm_headwaters_fileName,nhd_streams_fileName,nwm_lakes_fileName,nld_lines_fileName,nwm_catchments_fileName,wbd_fileName,wbd_buffer_fileName,ahps_sites_fileName,landsea_fileName,subset_nhd_streams_fileName,subset_nwm_lakes_fileName,subset_nld_lines_fileName,subset_nwm_headwaters_fileName,subset_nwm_catchments_fileName,subset_nwm_streams_fileName,subset_landsea_filename,subset_nhd_headwaters_fileName,dissolveLinks,extent) diff --git a/lib/split_flows.py b/lib/split_flows.py deleted file mode 100755 index 0687ea0cd..000000000 --- a/lib/split_flows.py +++ /dev/null @@ -1,207 +0,0 @@ -#!/usr/bin/env python3 - -''' -Description: - 1) split stream segments based on lake boundaries and input threshold distance - 2) calculate channel slope, manning's n, and LengthKm for each segment - 3) create unique ids using HUC8 boundaries (and unique 'fossid' column) - 4) create network traversal attribute columns (To_Node, From_Node, NextDownID) - 5) create points layer with segment verticies encoded with HydroID's (used for catchment delineation in next step) -''' - -import sys -import geopandas as gpd -import pandas as pd -from shapely.geometry import Point, LineString, MultiPoint -import rasterio -import numpy as np -import argparse -from tqdm import tqdm -import time -from os.path import isfile -from os import remove -from collections import OrderedDict -import buildstreamtraversal - -flows_fileName = sys.argv[1] -dem_fileName = sys.argv[2] -split_flows_fileName = sys.argv[3] -split_points_fileName = sys.argv[4] -maxLength = float(sys.argv[5]) -slope_min = float(sys.argv[6]) -huc8_filename = sys.argv[7] -lakes_filename = sys.argv[8] -lakes_buffer_input = float(sys.argv[9]) - -toMetersConversion = 1e-3 - -print('Loading data ...') -flows = gpd.read_file(flows_fileName) - -if not len(flows) > 0: - print ("No relevant streams within HUC boundaries.") - sys.exit(0) - -WBD8 = gpd.read_file(huc8_filename) -#dem = Raster(dem_fileName) -dem = rasterio.open(dem_fileName,'r') -if isfile(lakes_filename): - lakes = gpd.read_file(lakes_filename) -else: - lakes = None - -WBD8 = WBD8.filter(items=['fossid', 'geometry']) -WBD8 = WBD8.set_index('fossid') -flows = flows.explode() - -# temp -flows = flows.to_crs(WBD8.crs) - -split_flows = [] -slopes = [] -HYDROID = 'HydroID' -split_endpoints = OrderedDict() -# check for lake features -if lakes is not None: - if len(lakes) > 0: - print ('splitting stream segments at ' + str(len(lakes)) + ' waterbodies') - #create splits at lake boundaries - lakes = lakes.filter(items=['newID', 'geometry']) - lakes = lakes.set_index('newID') - flows = gpd.overlay(flows, lakes, how='union').explode().reset_index(drop=True) - lakes_buffer = lakes.copy() - lakes_buffer['geometry'] = lakes.buffer(lakes_buffer_input) # adding X meter buffer for spatial join comparison (currently using 20meters) - -print ('splitting ' + str(len(flows)) + ' stream segments based on ' + str(maxLength) + ' m max length') - -# remove empty geometries -flows = flows.loc[~flows.is_empty,:] - -for i,lineString in tqdm(enumerate(flows.geometry),total=len(flows.geometry)): - # Reverse geometry order (necessary for BurnLines) - lineString = LineString(lineString.coords[::-1]) - - # skip lines of zero length - if lineString.length == 0: - continue - - # existing reaches of less than maxLength - if lineString.length < maxLength: - split_flows = split_flows + [lineString] - line_points = [point for point in zip(*lineString.coords.xy)] - - # Calculate channel slope - start_point = line_points[0]; end_point = line_points[-1] - start_elev,end_elev = [i[0] for i in rasterio.sample.sample_gen(dem,[start_point,end_point])] - slope = float(abs(start_elev - end_elev) / lineString.length) - if slope < slope_min: - slope = slope_min - slopes = slopes + [slope] - continue - - splitLength = lineString.length / np.ceil(lineString.length / maxLength) - - cumulative_line = [] - line_points = [] - last_point = [] - - last_point_in_entire_lineString = list(zip(*lineString.coords.xy))[-1] - - for point in zip(*lineString.coords.xy): - - cumulative_line = cumulative_line + [point] - line_points = line_points + [point] - numberOfPoints_in_cumulative_line = len(cumulative_line) - - if last_point: - cumulative_line = [last_point] + cumulative_line - numberOfPoints_in_cumulative_line = len(cumulative_line) - elif numberOfPoints_in_cumulative_line == 1: - continue - - cumulative_length = LineString(cumulative_line).length - - if cumulative_length >= splitLength: - - splitLineString = LineString(cumulative_line) - split_flows = split_flows + [splitLineString] - - # Calculate channel slope - start_point = cumulative_line[0]; end_point = cumulative_line[-1] - start_elev,end_elev = [i[0] for i in rasterio.sample.sample_gen(dem,[start_point,end_point])] - slope = float(abs(start_elev - end_elev) / splitLineString.length) - if slope < slope_min: - slope = slope_min - slopes = slopes + [slope] - - last_point = end_point - - if (last_point == last_point_in_entire_lineString): - continue - - cumulative_line = [] - line_points = [] - - splitLineString = LineString(cumulative_line) - split_flows = split_flows + [splitLineString] - - # Calculate channel slope - start_point = cumulative_line[0]; end_point = cumulative_line[-1] - start_elev,end_elev = [i[0] for i in rasterio.sample.sample_gen(dem,[start_point,end_point])] - slope = float(abs(start_elev - end_elev) / splitLineString.length) - if slope < slope_min: - slope = slope_min - slopes = slopes + [slope] - -split_flows_gdf = gpd.GeoDataFrame({'S0' : slopes ,'geometry':split_flows}, crs=flows.crs, geometry='geometry') -split_flows_gdf['LengthKm'] = split_flows_gdf.geometry.length * toMetersConversion -if lakes is not None: - split_flows_gdf = gpd.sjoin(split_flows_gdf, lakes_buffer, how='left', op='within') #options: intersects, within, contains, crosses - split_flows_gdf = split_flows_gdf.rename(columns={"index_right": "LakeID"}).fillna(-999) -else: - split_flows_gdf['LakeID'] = -999 - -# Create Ids and Network Traversal Columns -addattributes = buildstreamtraversal.BuildStreamTraversalColumns() -tResults=None -tResults = addattributes.execute(split_flows_gdf, WBD8, HYDROID) -if tResults[0] == 'OK': - split_flows_gdf = tResults[1] -else: - print ('Error: Could not add network attributes to stream segments') - -# Get Outlet Point Only -#outlet = OrderedDict() -#for i,segment in split_flows_gdf.iterrows(): -# outlet[segment.geometry.coords[-1]] = segment[HYDROID] - -#hydroIDs_points = [hidp for hidp in outlet.values()] -#split_points = [Point(*point) for point in outlet] - -# Get all vertices -split_points = OrderedDict() -for row in split_flows_gdf[['geometry',HYDROID, 'NextDownID']].iterrows(): - lineString = row[1][0] - - for point in zip(*lineString.coords.xy): - if point in split_points: - if row[1][2] == split_points[point]: - pass - else: - split_points[point] = row[1][1] - else: - split_points[point] = row[1][1] - -hydroIDs_points = [hidp for hidp in split_points.values()] -split_points = [Point(*point) for point in split_points] - -split_points_gdf = gpd.GeoDataFrame({'id': hydroIDs_points , 'geometry':split_points}, crs=flows.crs, geometry='geometry') -print('Writing outputs ...') - -if isfile(split_flows_fileName): - remove(split_flows_fileName) -split_flows_gdf.to_file(split_flows_fileName,driver='GPKG',index=False) - -if isfile(split_points_fileName): - remove(split_points_fileName) -split_points_gdf.to_file(split_points_fileName,driver='GPKG',index=False) diff --git a/lib/time_and_tee_run_by_unit.sh b/lib/time_and_tee_run_by_unit.sh deleted file mode 100755 index c1eca97fc..000000000 --- a/lib/time_and_tee_run_by_unit.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash -e - -/usr/bin/time -v $libDir/run_by_unit.sh $1 |& tee $outputRunDataDir/logs/$1.log -exit ${PIPESTATUS[0]} - diff --git a/lib/utils/shared_functions.py b/lib/utils/shared_functions.py deleted file mode 100644 index fa643d7fe..000000000 --- a/lib/utils/shared_functions.py +++ /dev/null @@ -1,82 +0,0 @@ -#!/usr/bin/env python3 - -import os - - -def pull_file(url, full_pulled_filepath): - """ - This helper function pulls a file and saves it to a specified path. - - Args: - url (str): The full URL to the file to download. - full_pulled_filepath (str): The full system path where the downloaded file will be saved. - """ - import urllib.request - - print("Pulling " + url) - urllib.request.urlretrieve(url, full_pulled_filepath) - - -def delete_file(file_path): - """ - This helper function deletes a file. - - Args: - file_path (str): System path to a file to be deleted. - """ - - try: - os.remove(file_path) - except FileNotFoundError: - pass - - -def run_system_command(args): - """ - This helper function takes a system command and runs it. This function is designed for use - in multiprocessing. - - Args: - args (list): A single-item list, the first and only item being a system command string. - """ - - # Parse system command. - command = args[0] - - # Run system command. - os.system(command) - - -def subset_wbd_gpkg(wbd_gpkg, multilayer_wbd_geopackage): - - import geopandas as gp - from utils.shared_variables import CONUS_STATE_LIST, PREP_PROJECTION - - print("Subsetting " + wbd_gpkg + "...") - # Read geopackage into dataframe. - wbd = gp.read_file(wbd_gpkg) - gdf = gp.GeoDataFrame(wbd) - - for index, row in gdf.iterrows(): - state = row["STATES"] - if state != None: # Some polygons are empty in the STATES field. - keep_flag = False # Default to Fault, i.e. to delete the polygon. - if state in CONUS_STATE_LIST: - keep_flag = True - # Only split if multiple states present. More efficient this way. - elif len(state) > 2: - for wbd_state in state.split(","): # Some polygons have multiple states, separated by a comma. - if wbd_state in CONUS_STATE_LIST: # Check each polygon to make sure it's state abbrev name is allowed. - keep_flag = True - break - if not keep_flag: - gdf.drop(index, inplace=True) # Delete from dataframe. - - # Overwrite geopackage. - layer_name = os.path.split(wbd_gpkg)[1].strip('.gpkg') - gdf.crs = PREP_PROJECTION - gdf.to_file(multilayer_wbd_geopackage, layer=layer_name, driver='GPKG') - - - - \ No newline at end of file diff --git a/lib/utils/shared_variables.py b/lib/utils/shared_variables.py deleted file mode 100644 index cf75c733c..000000000 --- a/lib/utils/shared_variables.py +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/env python3 - -# Projections. -#PREP_PROJECTION = "+proj=aea +datum=NAD83 +x_0=0.0 +y_0=0.0 +lon_0=96dW +lat_0=23dN +lat_1=29d30'N +lat_2=45d30'N +towgs84=-0.9956000824677655,1.901299877314078,0.5215002840524426,0.02591500053005733,0.009425998542707753,0.01159900118427752,-0.00062000005129903 +no_defs +units=m" -PREP_PROJECTION = 'PROJCS["USA_Contiguous_Albers_Equal_Area_Conic_USGS_version",GEOGCS["NAD83",DATUM["North_American_Datum_1983",SPHEROID["GRS 1980",6378137,298.2572221010042,AUTHORITY["EPSG","7019"]],AUTHORITY["EPSG","6269"]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AUTHORITY["EPSG","4269"]],PROJECTION["Albers_Conic_Equal_Area"],PARAMETER["standard_parallel_1",29.5],PARAMETER["standard_parallel_2",45.5],PARAMETER["latitude_of_center",23],PARAMETER["longitude_of_center",-96],PARAMETER["false_easting",0],PARAMETER["false_northing",0],UNIT["metre",1,AUTHORITY["EPSG","9001"]]]' - -# -- Data URLs-- # -NHD_URL_PARENT = r'https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/NHDPlusHR/Beta/GDB/' -NWM_HYDROFABRIC_URL = r'http://www.nohrsc.noaa.gov/pub/staff/keicher/NWM_live/web/data_tools/NWM_channel_hydrofabric.tar.gz' # Temporary -WBD_NATIONAL_URL = r'https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/WBD/National/GDB/WBD_National_GDB.zip' -WBD_HU2_URL_PARENT = r'http://prd-tnm.s3-website-us-west-2.amazonaws.com/?prefix=StagedProducts/Hydrography/WBD/HU2/GDB' - -# -- Prefixes and Suffixes -- # -NHD_URL_PREFIX = 'NHDPLUS_H_' -NHD_RASTER_URL_SUFFIX = '_HU4_RASTER.7z' -NHD_VECTOR_URL_SUFFIX = '_HU4_GDB.zip' -NHD_RASTER_EXTRACTION_PREFIX = 'HRNHDPlusRasters' -NHD_RASTER_EXTRACTION_SUFFIX = 'elev_cm.tif' - -NHD_VECTOR_EXTRACTION_PREFIX = 'NHDPLUS_H_' -NHD_VECTOR_EXTRACTION_SUFFIX = '_HU4_GDB.zip' - -# -- Field Names -- # -FOSS_ID = 'fossid' - -# -- Other -- # -CONUS_STATE_LIST = {"AL", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA", - "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", - "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", - "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "PR", "RI", "SC", - "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"} - -OVERWRITE_WBD = 'OVERWRITE_WBD' -OVERWRITE_NHD = 'OVERWRITE_NHD' -OVERWRITE_ALL = 'OVERWRITE_ALL' diff --git a/lib/utils/__init__.py b/src/__init__.py similarity index 100% rename from lib/utils/__init__.py rename to src/__init__.py diff --git a/lib/acquire_and_preprocess_inputs.py b/src/acquire_and_preprocess_inputs.py similarity index 88% rename from lib/acquire_and_preprocess_inputs.py rename to src/acquire_and_preprocess_inputs.py index f3b200e94..417cc4d93 100755 --- a/lib/acquire_and_preprocess_inputs.py +++ b/src/acquire_and_preprocess_inputs.py @@ -4,10 +4,12 @@ import argparse import csv import sys +sys.path.append('/foss_fim/src') import shutil from multiprocessing import Pool -import geopandas as gp +import geopandas as gpd from urllib.error import HTTPError +from tqdm import tqdm from utils.shared_variables import (NHD_URL_PARENT, NHD_URL_PREFIX, @@ -17,24 +19,27 @@ NHD_VECTOR_EXTRACTION_SUFFIX, PREP_PROJECTION, WBD_NATIONAL_URL, - FOSS_ID, - OVERWRITE_WBD, - OVERWRITE_NHD, - OVERWRITE_ALL) + FIM_ID + ) -from utils.shared_functions import pull_file, run_system_command, subset_wbd_gpkg, delete_file +from utils.shared_functions import (pull_file, + run_system_command, + delete_file, + getDriver) NHDPLUS_VECTORS_DIRNAME = 'nhdplus_vectors' NHDPLUS_RASTERS_DIRNAME = 'nhdplus_rasters' NWM_HYDROFABRIC_DIRNAME = 'nwm_hydrofabric' NWM_FILE_TO_SUBSET_WITH = 'nwm_flows.gpkg' + def subset_wbd_to_nwm_domain(wbd,nwm_file_to_use): - intersecting_indices = [not (gp.read_file(nwm_file_to_use,mask=b).empty) for b in wbd.geometry] + intersecting_indices = [not (gpd.read_file(nwm_file_to_use,mask=b).empty) for b in wbd.geometry] return(wbd[intersecting_indices]) + def pull_and_prepare_wbd(path_to_saved_data_parent_dir,nwm_dir_name,nwm_file_to_use,overwrite_wbd,num_workers): """ This helper function pulls and unzips Watershed Boundary Dataset (WBD) data. It uses the WBD URL defined by WBD_NATIONAL_URL. @@ -70,21 +75,20 @@ def pull_and_prepare_wbd(path_to_saved_data_parent_dir,nwm_dir_name,nwm_file_to_ procs_list, wbd_gpkg_list = [], [] multilayer_wbd_geopackage = os.path.join(wbd_directory, 'WBD_National.gpkg') - # Add fossid to HU8, project, and convert to geopackage. Code block from Brian Avant. + # Add fimid to HU8, project, and convert to geopackage. if os.path.isfile(multilayer_wbd_geopackage): os.remove(multilayer_wbd_geopackage) print("Making National WBD GPKG...") print("\tWBDHU8") - wbd_hu8 = gp.read_file(wbd_gdb_path, layer='WBDHU8') + wbd_hu8 = gpd.read_file(wbd_gdb_path, layer='WBDHU8') wbd_hu8 = wbd_hu8.rename(columns={'huc8':'HUC8'}) # rename column to caps wbd_hu8 = wbd_hu8.sort_values('HUC8') - fossids = [str(item).zfill(4) for item in list(range(1, 1 + len(wbd_hu8)))] - wbd_hu8[FOSS_ID] = fossids + fimids = [str(item).zfill(4) for item in list(range(1000, 1000 + len(wbd_hu8)))] + wbd_hu8[FIM_ID] = fimids wbd_hu8 = wbd_hu8.to_crs(PREP_PROJECTION) # Project. - #wbd_hu8.to_file(os.path.join(wbd_directory, 'WBDHU8.gpkg'), driver='GPKG') # Save. wbd_hu8 = subset_wbd_to_nwm_domain(wbd_hu8,nwm_file_to_use) wbd_hu8.geometry = wbd_hu8.buffer(0) - wbd_hu8.to_file(multilayer_wbd_geopackage, driver='GPKG',layer='WBDHU8') # Save. + wbd_hu8.to_file(multilayer_wbd_geopackage,layer='WBDHU8',driver=getDriver(multilayer_wbd_geopackage),index=False) # Save. wbd_hu8.HUC8.to_csv(nwm_huc_list_file_template.format('8'),index=False,header=False) #wbd_gpkg_list.append(os.path.join(wbd_directory, 'WBDHU8.gpkg')) # Append to wbd_gpkg_list for subsetting later. del wbd_hu8 @@ -93,19 +97,19 @@ def pull_and_prepare_wbd(path_to_saved_data_parent_dir,nwm_dir_name,nwm_file_to_ for wbd_layer_num in ['4', '6']: wbd_layer = 'WBDHU' + wbd_layer_num print("\t{}".format(wbd_layer)) - wbd = gp.read_file(wbd_gdb_path,layer=wbd_layer) + wbd = gpd.read_file(wbd_gdb_path,layer=wbd_layer) wbd = wbd.to_crs(PREP_PROJECTION) wbd = wbd.rename(columns={'huc'+wbd_layer_num : 'HUC' + wbd_layer_num}) wbd = subset_wbd_to_nwm_domain(wbd,nwm_file_to_use) wbd.geometry = wbd.buffer(0) - wbd.to_file(multilayer_wbd_geopackage,driver="GPKG",layer=wbd_layer) + wbd.to_file(multilayer_wbd_geopackage,layer=wbd_layer,driver=getDriver(multilayer_wbd_geopackage),index=False) wbd['HUC{}'.format(wbd_layer_num)].to_csv(nwm_huc_list_file_template.format(wbd_layer_num),index=False,header=False) #output_gpkg = os.path.join(wbd_directory, wbd_layer + '.gpkg') #wbd_gpkg_list.append(output_gpkg) #procs_list.append(['ogr2ogr -overwrite -progress -f GPKG -t_srs "{projection}" {output_gpkg} {wbd_gdb_path} {wbd_layer}'.format(output_gpkg=output_gpkg, wbd_gdb_path=wbd_gdb_path, wbd_layer=wbd_layer, projection=PREP_PROJECTION)]) - #pool = Pool(num_workers) - #pool.map(run_system_command, procs_list) + # with Pool(processes=num_workers) as pool: + # pool.map(run_system_command, procs_list) # Subset WBD layers to CONUS and add to single geopackage. #print("Subsetting WBD layers to CONUS...") @@ -122,6 +126,7 @@ def pull_and_prepare_wbd(path_to_saved_data_parent_dir,nwm_dir_name,nwm_file_to_ return(wbd_directory) + def pull_and_prepare_nwm_hydrofabric(path_to_saved_data_parent_dir, path_to_preinputs_dir,num_workers): """ This helper function pulls and unzips NWM hydrofabric data. It uses the NWM hydrofabric URL defined by NWM_HYDROFABRIC_URL. @@ -147,9 +152,8 @@ def pull_and_prepare_nwm_hydrofabric(path_to_saved_data_parent_dir, path_to_prei output_gpkg = os.path.join(nwm_hydrofabric_directory, nwm_layer + '_proj.gpkg') procs_list.append(['ogr2ogr -overwrite -progress -f GPKG -t_srs "{projection}" {output_gpkg} {nwm_hydrofabric_gdb} {nwm_layer}'.format(projection=PREP_PROJECTION, output_gpkg=output_gpkg, nwm_hydrofabric_gdb=nwm_hydrofabric_gdb, nwm_layer=nwm_layer)]) - pool = Pool(num_workers) - pool.map(run_system_command, procs_list) - pool.close() + with Pool(processes=num_workers) as pool: + pool.map(run_system_command, procs_list) def pull_and_prepare_nhd_data(args): @@ -180,9 +184,7 @@ def pull_and_prepare_nhd_data(args): if not os.path.exists(elev_cm_tif) or overwrite_nhd: pull_file(nhd_raster_download_url, nhd_raster_extraction_path) os.system("7za e {nhd_raster_extraction_path} -o{nhd_raster_parent_dir} elev_cm.tif -r ".format(nhd_raster_extraction_path=nhd_raster_extraction_path, nhd_raster_parent_dir=nhd_raster_parent_dir)) - # Change projection for elev_cm.tif. - #print("Projecting elev_cm...") - #run_system_command(['gdal_edit.py -a_srs "{projection}" {elev_cm_tif}'.format(projection=PREP_PROJECTION, elev_cm_tif=elev_cm_tif)]) + file_list = os.listdir(nhd_raster_parent_dir) for f in file_list: full_path = os.path.join(nhd_raster_parent_dir, f) @@ -204,15 +206,15 @@ def pull_and_prepare_nhd_data(args): huc = os.path.split(nhd_vector_extraction_parent)[1] # Parse HUC. os.system("7za x {nhd_vector_extraction_path} -o{nhd_vector_extraction_parent}".format(nhd_vector_extraction_path=nhd_vector_extraction_path, nhd_vector_extraction_parent=nhd_vector_extraction_parent)) # extract input stream network - nhd = gp.read_file(nhd_gdb,layer='NHDPlusBurnLineEvent') + nhd = gpd.read_file(nhd_gdb,layer='NHDPlusBurnLineEvent') nhd = nhd.to_crs(PREP_PROJECTION) nhd.to_file(os.path.join(nhd_vector_extraction_parent, 'NHDPlusBurnLineEvent' + huc + '.gpkg'),driver='GPKG') # extract flowlines for FType attributes - nhd = gp.read_file(nhd_gdb,layer='NHDFlowline') + nhd = gpd.read_file(nhd_gdb,layer='NHDFlowline') nhd = nhd.to_crs(PREP_PROJECTION) nhd.to_file(os.path.join(nhd_vector_extraction_parent, 'NHDFlowline' + huc + '.gpkg'),driver='GPKG') # extract attributes - nhd = gp.read_file(nhd_gdb,layer='NHDPlusFlowLineVAA') + nhd = gpd.read_file(nhd_gdb,layer='NHDPlusFlowLineVAA') nhd.to_file(os.path.join(nhd_vector_extraction_parent, 'NHDPlusFlowLineVAA' + huc + '.gpkg'),driver='GPKG') # -- Project and convert NHDPlusBurnLineEvent and NHDPlusFlowLineVAA vectors to geopackage -- # #for nhd_layer in ['NHDPlusBurnLineEvent', 'NHDPlusFlowlineVAA']: @@ -245,15 +247,15 @@ def build_huc_list_files(path_to_saved_data_parent_dir, wbd_directory): huc_gpkg = 'WBDHU8' # The WBDHU4 are handled by the nhd_plus_raster_dir name. # Open geopackage. - wbd = gp.read_file(full_huc_gpkg, layer=huc_gpkg) + wbd = gpd.read_file(full_huc_gpkg, layer=huc_gpkg) # Loop through entries and compare against the huc4_list to get available HUCs within the geopackage domain. - for index, row in tqdm(wbd.iterrows()): + for index, row in tqdm(wbd.iterrows(),total=len(wbd)): huc = row["HUC" + huc_gpkg[-1]] huc_mask = wbd.loc[wbd[str("HUC" + huc_gpkg[-1])]==huc].geometry burnline = os.path.join(nhd_plus_vector_dir, huc[0:4], 'NHDPlusBurnLineEvent' + huc[0:4] + '.gpkg') if os.path.exists(burnline): - nhd_test = len(gp.read_file(burnline, mask = huc_mask)) # this is slow, iterates through 2000+ HUC8s + nhd_test = len(gpd.read_file(burnline, mask = huc_mask)) # this is slow, iterates through 2000+ HUC8s # Append huc to huc8 list. if (str(huc[:4]) in huc4_list) & (nhd_test>0): huc8_list.append(huc) @@ -262,9 +264,12 @@ def build_huc_list_files(path_to_saved_data_parent_dir, wbd_directory): huc6_list = set(huc6_list) # Write huc lists to appropriate .lst files. - included_huc4_file = os.path.join(path_to_saved_data_parent_dir, 'included_huc4.lst') - included_huc6_file = os.path.join(path_to_saved_data_parent_dir, 'included_huc6.lst') - included_huc8_file = os.path.join(path_to_saved_data_parent_dir, 'included_huc8.lst') + huc_lists_dir = os.path.join(path_to_saved_data_parent_dir, 'huc_lists') + if not os.path.exists(huc_lists_dir): + os.mkdir(huc_lists_dir) + included_huc4_file = os.path.join(huc_lists_dir, 'included_huc4.lst') + included_huc6_file = os.path.join(huc_lists_dir, 'included_huc6.lst') + included_huc8_file = os.path.join(huc_lists_dir, 'included_huc8.lst') # Overly verbose file writing loops. Doing this in a pinch. with open(included_huc4_file, 'w') as f: @@ -345,8 +350,9 @@ def manage_preprocessing(hucs_of_interest, num_workers=1,overwrite_nhd=False, ov nhd_procs_list.append([nhd_raster_download_url, nhd_raster_extraction_path, nhd_vector_download_url, nhd_vector_extraction_path, overwrite_nhd]) # Pull and prepare NHD data. - #pool = Pool(num_workers) - #pool.map(pull_and_prepare_nhd_data, nhd_procs_list) + # with Pool(processes=num_workers) as pool: + # pool.map(pull_and_prepare_nhd_data, nhd_procs_list) + for huc in nhd_procs_list: try: pull_and_prepare_nhd_data(huc) diff --git a/lib/add_crosswalk.py b/src/add_crosswalk.py similarity index 56% rename from lib/add_crosswalk.py rename to src/add_crosswalk.py index 6ae1c6980..9b38418b2 100755 --- a/lib/add_crosswalk.py +++ b/src/add_crosswalk.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 +import os import geopandas as gpd import pandas as pd from numpy import unique @@ -7,18 +8,26 @@ import json import argparse import sys +# sys.path.append('/foss_fim/src') +# sys.path.append('/foss_fim/config') +from utils.shared_functions import getDriver, mem_profile +from utils.shared_variables import FIM_ID -def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_fileName,output_catchments_fileName,output_flows_fileName,output_src_fileName,output_src_json_fileName,output_crosswalk_fileName,output_hydro_table_fileName,input_huc_fileName,input_nwmflows_fileName,input_nwmcatras_fileName,mannings_n,input_nwmcat_fileName,extent,calibration_mode=False): + +@mem_profile +def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_fileName,output_catchments_fileName,output_flows_fileName,output_src_fileName,output_src_json_fileName,output_crosswalk_fileName,output_hydro_table_fileName,input_huc_fileName,input_nwmflows_fileName,input_nwmcatras_fileName,mannings_n,input_nwmcat_fileName,extent,small_segments_filename,calibration_mode=False): input_catchments = gpd.read_file(input_catchments_fileName) input_flows = gpd.read_file(input_flows_fileName) input_huc = gpd.read_file(input_huc_fileName) input_nwmflows = gpd.read_file(input_nwmflows_fileName) + min_catchment_area = float(os.environ['min_catchment_area']) #0.25# + min_stream_length = float(os.environ['min_stream_length']) #0.5# if extent == 'FR': ## crosswalk using majority catchment method - # calculate majority catchemnts + # calculate majority catchments majority_calc = zonal_stats(input_catchments, input_nwmcatras_fileName, stats=['majority'], geojson_out=True) input_majorities = gpd.GeoDataFrame.from_features(majority_calc) input_majorities = input_majorities.rename(columns={'majority' : 'feature_id'}) @@ -32,19 +41,20 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f relevant_input_nwmflows = input_nwmflows[input_nwmflows['feature_id'].isin(input_majorities['feature_id'])] relevant_input_nwmflows = relevant_input_nwmflows.filter(items=['feature_id','order_']) - if calibration_mode == False: - if input_catchments.HydroID.dtype != 'int': input_catchments.HydroID = input_catchments.HydroID.astype(int) - output_catchments = input_catchments.merge(input_majorities[['HydroID','feature_id']],on='HydroID') - output_catchments = output_catchments.merge(relevant_input_nwmflows[['order_','feature_id']],on='feature_id') + if input_catchments.HydroID.dtype != 'int': input_catchments.HydroID = input_catchments.HydroID.astype(int) + output_catchments = input_catchments.merge(input_majorities[['HydroID','feature_id']],on='HydroID') + output_catchments = output_catchments.merge(relevant_input_nwmflows[['order_','feature_id']],on='feature_id') if input_flows.HydroID.dtype != 'int': input_flows.HydroID = input_flows.HydroID.astype(int) output_flows = input_flows.merge(input_majorities[['HydroID','feature_id']],on='HydroID') if output_flows.HydroID.dtype != 'int': output_flows.HydroID = output_flows.HydroID.astype(int) output_flows = output_flows.merge(relevant_input_nwmflows[['order_','feature_id']],on='feature_id') + output_flows = output_flows.merge(output_catchments.filter(items=['HydroID','areasqkm']),on='HydroID') elif extent == 'MS': ## crosswalk using stream segment midpoint method input_nwmcat = gpd.read_file(input_nwmcat_fileName, mask=input_huc) + input_nwmcat = input_nwmcat.loc[input_nwmcat.mainstem==1] input_nwmcat = input_nwmcat.rename(columns={'ID':'feature_id'}) if input_nwmcat.feature_id.dtype != 'int': input_nwmcat.feature_id = input_nwmcat.feature_id.astype(int) input_nwmcat=input_nwmcat.set_index('feature_id') @@ -57,7 +67,7 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f hydroID = [] for i,lineString in enumerate(input_flows.geometry): hydroID = hydroID + [input_flows.loc[i,'HydroID']] - stream_midpoint = stream_midpoint + [lineString.interpolate(0.05,normalized=True)] + stream_midpoint = stream_midpoint + [lineString.interpolate(0.5,normalized=True)] input_flows_midpoint = gpd.GeoDataFrame({'HydroID':hydroID, 'geometry':stream_midpoint}, crs=input_flows.crs, geometry='geometry') input_flows_midpoint = input_flows_midpoint.set_index('HydroID') @@ -65,6 +75,22 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f # Create crosswalk crosswalk = gpd.sjoin(input_flows_midpoint, input_nwmcat, how='left', op='within').reset_index() crosswalk = crosswalk.rename(columns={"index_right": "feature_id"}) + + # fill in missing ms + crosswalk_missing = crosswalk.loc[crosswalk.feature_id.isna()] + for index, stream in crosswalk_missing.iterrows(): + + # find closest nwm catchment by distance + distances = [stream.geometry.distance(poly) for poly in input_nwmcat.geometry] + min_dist = min(distances) + nwmcat_index=distances.index(min_dist) + + # update crosswalk + crosswalk.loc[crosswalk.HydroID==stream.HydroID,'feature_id'] = input_nwmcat.iloc[nwmcat_index].name + crosswalk.loc[crosswalk.HydroID==stream.HydroID,'AreaSqKM'] = input_nwmcat.iloc[nwmcat_index].AreaSqKM + crosswalk.loc[crosswalk.HydroID==stream.HydroID,'Shape_Length'] = input_nwmcat.iloc[nwmcat_index].Shape_Length + crosswalk.loc[crosswalk.HydroID==stream.HydroID,'Shape_Area'] = input_nwmcat.iloc[nwmcat_index].Shape_Area + crosswalk = crosswalk.filter(items=['HydroID', 'feature_id']) crosswalk = crosswalk.merge(input_nwmflows[['feature_id','order_']],on='feature_id') @@ -72,12 +98,12 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f print ("No relevant streams within HUC boundaries.") sys.exit(0) - if calibration_mode == False: - if input_catchments.HydroID.dtype != 'int': input_catchments.HydroID = input_catchments.HydroID.astype(int) - output_catchments = input_catchments.merge(crosswalk,on='HydroID') + if input_catchments.HydroID.dtype != 'int': input_catchments.HydroID = input_catchments.HydroID.astype(int) + output_catchments = input_catchments.merge(crosswalk,on='HydroID') if input_flows.HydroID.dtype != 'int': input_flows.HydroID = input_flows.HydroID.astype(int) output_flows = input_flows.merge(crosswalk,on='HydroID') + output_flows = output_flows.merge(output_catchments.filter(items=['HydroID','areasqkm']),on='HydroID') # read in manning's n values if calibration_mode == False: @@ -91,11 +117,67 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f output_flows['ManningN'] = output_flows['order_'].astype(str).map(mannings_dict) + if output_flows.NextDownID.dtype != 'int': output_flows.NextDownID = output_flows.NextDownID.astype(int) + + # Adjust short model reach rating curves + print("Adjusting model reach rating curves") + sml_segs = pd.DataFrame() + + # replace small segment geometry with neighboring stream + for stream_index in output_flows.index: + if output_flows["areasqkm"][stream_index] < min_catchment_area and output_flows["LengthKm"][stream_index] < min_stream_length and output_flows["LakeID"][stream_index] < 0: + + short_id = output_flows['HydroID'][stream_index] + to_node = output_flows['To_Node'][stream_index] + from_node = output_flows['From_Node'][stream_index] + + # multiple upstream segments + if len(output_flows.loc[output_flows['NextDownID'] == short_id]['HydroID']) > 1: + try: + max_order = max(output_flows.loc[output_flows['NextDownID'] == short_id]['order_']) # drainage area would be better than stream order but we would need to calculate + except: + print(f"short_id {short_id} cannot calculate max stream order for multiple upstream segments scenario") + + if len(output_flows.loc[(output_flows['NextDownID'] == short_id) & (output_flows['order_'] == max_order)]['HydroID']) == 1: + update_id = output_flows.loc[(output_flows['NextDownID'] == short_id) & (output_flows['order_'] == max_order)]['HydroID'].item() + + else: + update_id = output_flows.loc[(output_flows['NextDownID'] == short_id) & (output_flows['order_'] == max_order)]['HydroID'].values[0] # get the first one (same stream order, without drainage area info it is hard to know which is the main channel) + + # single upstream segments + elif len(output_flows.loc[output_flows['NextDownID'] == short_id]['HydroID']) == 1: + update_id = output_flows.loc[output_flows.To_Node==from_node]['HydroID'].item() + + # no upstream segments; multiple downstream segments + elif len(output_flows.loc[output_flows.From_Node==to_node]['HydroID']) > 1: + try: + max_order = max(output_flows.loc[output_flows.From_Node==to_node]['HydroID']['order_']) # drainage area would be better than stream order but we would need to calculate + except: + print(f"To Node {to_node} cannot calculate max stream order for no upstream segments; multiple downstream segments scenario") + + if len(output_flows.loc[(output_flows['NextDownID'] == short_id) & (output_flows['order_'] == max_order)]['HydroID']) == 1: + update_id = output_flows.loc[(output_flows.From_Node==to_node) & (output_flows['order_'] == max_order)]['HydroID'].item() + + else: + update_id = output_flows.loc[(output_flows.From_Node==to_node) & (output_flows['order_'] == max_order)]['HydroID'].values[0] # get the first one (same stream order, without drainage area info it is hard to know which is the main channel) + + # no upstream segments; single downstream segment + elif len(output_flows.loc[output_flows.From_Node==to_node]['HydroID']) == 1: + update_id = output_flows.loc[output_flows.From_Node==to_node]['HydroID'].item() + + else: + update_id = output_flows.loc[output_flows.HydroID==short_id]['HydroID'].item() + + str_order = output_flows.loc[output_flows.HydroID==short_id]['order_'].item() + sml_segs = sml_segs.append({'short_id':short_id, 'update_id':update_id, 'str_order':str_order}, ignore_index=True) + + print("Number of short reaches [{} < {} and {} < {}] = {}".format("areasqkm", min_catchment_area, "LengthKm", min_stream_length, len(sml_segs))) + # calculate src_full input_src_base = pd.read_csv(input_srcbase_fileName, dtype= object) if input_src_base.CatchId.dtype != 'int': input_src_base.CatchId = input_src_base.CatchId.astype(int) - input_src_base = input_src_base.merge(output_flows[['ManningN','HydroID']],left_on='CatchId',right_on='HydroID') + input_src_base = input_src_base.merge(output_flows[['ManningN','HydroID','NextDownID','order_']],left_on='CatchId',right_on='HydroID') input_src_base = input_src_base.rename(columns=lambda x: x.strip(" ")) input_src_base = input_src_base.apply(pd.to_numeric,**{'errors' : 'coerce'}) @@ -114,6 +196,21 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f output_src = input_src_base.drop(columns=['CatchId']) if output_src.HydroID.dtype != 'int': output_src.HydroID = output_src.HydroID.astype(int) + # update rating curves + if len(sml_segs) > 0: + + sml_segs.to_csv(small_segments_filename,index=False) + print("Update rating curves for short reaches.") + + for index, segment in sml_segs.iterrows(): + + short_id = segment[0] + update_id= segment[1] + new_values = output_src.loc[output_src['HydroID'] == update_id][['Stage', 'Discharge (m3s-1)']] + + for src_index, src_stage in new_values.iterrows(): + output_src.loc[(output_src['HydroID']== short_id) & (output_src['Stage']== src_stage[0]),['Discharge (m3s-1)']] = src_stage[1] + if extent == 'FR': output_src = output_src.merge(input_majorities[['HydroID','feature_id']],on='HydroID') elif extent == 'MS': @@ -123,20 +220,26 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f output_crosswalk = output_crosswalk.drop_duplicates(ignore_index=True) # make hydroTable - output_hydro_table = output_src.loc[:,['HydroID','feature_id','Stage','Discharge (m3s-1)']] + output_hydro_table = output_src.loc[:,['HydroID','feature_id','NextDownID','order_','Number of Cells','SurfaceArea (m2)','BedArea (m2)','TopWidth (m)','LENGTHKM','AREASQKM','WettedPerimeter (m)','HydraulicRadius (m)','WetArea (m2)','Volume (m3)','SLOPE','ManningN','Stage','Discharge (m3s-1)']] output_hydro_table.rename(columns={'Stage' : 'stage','Discharge (m3s-1)':'discharge_cms'},inplace=True) + output_hydro_table['barc_on'] = False # set barc_on attribute to Fasle (default) --> will be overwritten if BARC module runs + output_hydro_table['vmann_on'] = False # set vmann_on attribute to Fasle (default) --> will be overwritten if variable roughness module runs + + if output_hydro_table.HydroID.dtype != 'str': output_hydro_table.HydroID = output_hydro_table.HydroID.astype(str) - output_hydro_table['HydroID'] = output_hydro_table.HydroID.str.zfill(8) - output_hydro_table['fossid'] = output_hydro_table.loc[:,'HydroID'].apply(lambda x : str(x)[0:4]) - if input_huc.fossid.dtype != 'str': input_huc.fossid = input_huc.fossid.astype(str) + output_hydro_table[FIM_ID] = output_hydro_table.loc[:,'HydroID'].apply(lambda x : str(x)[0:4]) + + if input_huc[FIM_ID].dtype != 'str': input_huc[FIM_ID] = input_huc[FIM_ID].astype(str) + output_hydro_table = output_hydro_table.merge(input_huc.loc[:,[FIM_ID,'HUC8']],how='left',on=FIM_ID) - output_hydro_table = output_hydro_table.merge(input_huc.loc[:,['fossid','HUC8']],how='left',on='fossid') if output_flows.HydroID.dtype != 'str': output_flows.HydroID = output_flows.HydroID.astype(str) - output_flows['HydroID'] = output_flows.HydroID.str.zfill(8) output_hydro_table = output_hydro_table.merge(output_flows.loc[:,['HydroID','LakeID']],how='left',on='HydroID') output_hydro_table['LakeID'] = output_hydro_table['LakeID'].astype(int) output_hydro_table = output_hydro_table.rename(columns={'HUC8':'HUC'}) - output_hydro_table.drop(columns='fossid',inplace=True) + if output_hydro_table.HUC.dtype != 'str': output_hydro_table.HUC = output_hydro_table.HUC.astype(str) + + output_hydro_table.drop(columns=FIM_ID,inplace=True) + if output_hydro_table.feature_id.dtype != 'int': output_hydro_table.feature_id = output_hydro_table.feature_id.astype(int) if output_hydro_table.feature_id.dtype != 'str': output_hydro_table.feature_id = output_hydro_table.feature_id.astype(str) # write out based on mode @@ -158,8 +261,8 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f output_src_json[str(hid)] = { 'q_list' : q_list , 'stage_list' : stage_list } # write out - output_catchments.to_file(output_catchments_fileName, driver="GPKG",index=False) - output_flows.to_file(output_flows_fileName, driver="GPKG", index=False) + output_catchments.to_file(output_catchments_fileName,driver=getDriver(output_catchments_fileName),index=False) + output_flows.to_file(output_flows_fileName,driver=getDriver(output_flows_fileName),index=False) output_src.to_csv(output_src_fileName,index=False) output_crosswalk.to_csv(output_crosswalk_fileName,index=False) output_hydro_table.to_csv(output_hydro_table_fileName,index=False) @@ -167,14 +270,15 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f with open(output_src_json_fileName,'w') as f: json.dump(output_src_json,f,sort_keys=True,indent=2) + if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Subset vector layers') + parser = argparse.ArgumentParser(description='Crosswalk for MS/FR networks; calculate synthetic rating curves; update short rating curves') parser.add_argument('-d','--input-catchments-fileName', help='DEM derived catchments', required=True) parser.add_argument('-a','--input-flows-fileName', help='DEM derived streams', required=True) parser.add_argument('-s','--input-srcbase-fileName', help='Base synthetic rating curve table', required=True) parser.add_argument('-l','--output-catchments-fileName', help='Subset crosswalked catchments', required=True) - parser.add_argument('-f','--output-flows-fileName', help='Subset crosswalked streams', required=True) + parser.add_argument('-f','--output-flows-fileName', help='Subset crosswalked streams', required=True) parser.add_argument('-r','--output-src-fileName', help='Output crosswalked synthetic rating curve table', required=True) parser.add_argument('-j','--output-src-json-fileName',help='Output synthetic rating curve json',required=True) parser.add_argument('-x','--output-crosswalk-fileName',help='Crosswalk table',required=True) @@ -185,6 +289,7 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f parser.add_argument('-m','--mannings-n',help='Mannings n. Accepts single parameter set or list of parameter set in calibration mode. Currently input as csv.',required=True) parser.add_argument('-z','--input-nwmcat-fileName',help='NWM catchment polygon',required=True) parser.add_argument('-p','--extent',help='MS or FR extent',required=True) + parser.add_argument('-k','--small-segments-filename',help='output list of short segments',required=True) parser.add_argument('-c','--calibration-mode',help='Mannings calibration flag',required=False,action='store_true') args = vars(parser.parse_args()) @@ -204,6 +309,7 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f mannings_n = args['mannings_n'] input_nwmcat_fileName = args['input_nwmcat_fileName'] extent = args['extent'] + small_segments_filename = args['small_segments_filename'] calibration_mode = args['calibration_mode'] - add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_fileName,output_catchments_fileName,output_flows_fileName,output_src_fileName,output_src_json_fileName,output_crosswalk_fileName,output_hydro_table_fileName,input_huc_fileName,input_nwmflows_fileName,input_nwmcatras_fileName,mannings_n,input_nwmcat_fileName,extent,calibration_mode) + add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_fileName,output_catchments_fileName,output_flows_fileName,output_src_fileName,output_src_json_fileName,output_crosswalk_fileName,output_hydro_table_fileName,input_huc_fileName,input_nwmflows_fileName,input_nwmcatras_fileName,mannings_n,input_nwmcat_fileName,extent,small_segments_filename,calibration_mode) diff --git a/src/adjust_headwater_streams.py b/src/adjust_headwater_streams.py new file mode 100644 index 000000000..8350311c5 --- /dev/null +++ b/src/adjust_headwater_streams.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python3 + +import geopandas as gpd +import pandas as pd +import numpy as np +import argparse +import pygeos +from shapely.geometry import Point,LineString +from shapely.ops import split +from shapely.wkb import dumps, loads +from utils.shared_variables import PREP_PROJECTION +from utils.shared_functions import getDriver +import warnings +warnings.simplefilter("ignore") + + +def adjust_headwaters(huc,nhd_streams,nwm_headwaters,nws_lids,headwater_id): + + # Identify true headwater segments + nhd_streams_adj = nhd_streams.loc[(nhd_streams.headwaters_id > 0) & (nhd_streams.downstream_of_headwater == False),:].copy() + nhd_streams_adj = nhd_streams_adj.explode() + nhd_streams_adj = nhd_streams_adj.reset_index(drop=True) + + if nwm_headwaters["site_id"].dtype != 'int': nwm_headwaters["site_id"] = nwm_headwaters["site_id"].astype(int) + headwater_limited = nwm_headwaters.merge(nhd_streams_adj[["headwaters_id","mainstem"]],left_on="site_id", right_on="headwaters_id",how='right') + headwater_limited = headwater_limited.drop(columns=['headwaters_id']) + + nws_lid_limited = nws_lids.merge(nhd_streams[["nws_lid"]],left_on="site_id", right_on="nws_lid",how='right') + nws_lid_limited = nws_lid_limited.loc[nws_lid_limited.nws_lid!=''] + nws_lid_limited = nws_lid_limited.drop(columns=['nws_lid']) + + # Check for issues in nws_lid layer (now this reports back non-headwater nws_lids) + # if len(nws_lid_limited) < len(nws_lids): + # missing_nws_lids = list(set(nws_lids.site_id) - set(nws_lid_limited.site_id)) + # print (f"nws lid(s) {missing_nws_lids} missing from aggregate dataset in huc {huc}") + + # Combine NWM headwaters and AHPS sites to be snapped to NHDPlus HR segments + headwater_pts = headwater_limited.append(nws_lid_limited) + headwater_pts = headwater_pts.reset_index(drop=True) + + if headwater_pts is not None: + + headwaterstreams = [] + referencedpoints = [] + snapped_ahps = [] + nws_lid = [] + for index, point in headwater_pts.iterrows(): + + # Convert headwaterpoint geometries to WKB representation + wkb_points = dumps(point.geometry) + + # Create pygeos headwaterpoint geometries from WKB representation + pointbin_geom = pygeos.io.from_wkb(wkb_points) + + if point.pt_type == 'nwm_headwater': + # Closest segment to headwater + closest_stream = nhd_streams_adj.loc[nhd_streams_adj["headwaters_id"]==point[headwater_id]] + else: + # Closest segment to ahps site + closest_stream = nhd_streams.loc[nhd_streams["nws_lid"]==point[headwater_id]] + + try: # Seeing inconsistent geometry objects even after exploding nhd_streams_adj; not sure why this is + closest_stream =closest_stream.explode() + except: + pass + + try: + wkb_closest_stream = dumps(closest_stream.geometry[0]) + except: + wkb_closest_stream = dumps(closest_stream.geometry[0][0]) + + streambin_geom = pygeos.io.from_wkb(wkb_closest_stream) + + # Linear reference headwater to closest stream segment + pointdistancetoline = pygeos.linear.line_locate_point(streambin_geom, pointbin_geom) + referencedpoint = pygeos.linear.line_interpolate_point(streambin_geom, pointdistancetoline) + + # Convert geometries to wkb representation + bin_referencedpoint = pygeos.io.to_wkb(referencedpoint) + + # Convert to shapely geometries + shply_referencedpoint = loads(bin_referencedpoint) + shply_linestring = loads(wkb_closest_stream) + headpoint = Point(shply_referencedpoint.coords) + + if point.pt_type == 'nwm_headwater': + cumulative_line = [] + relativedistlst = [] + + # Collect all nhd stream segment linestring verticies + for point in zip(*shply_linestring.coords.xy): + cumulative_line = cumulative_line + [point] + relativedist = shply_linestring.project(Point(point)) + relativedistlst = relativedistlst + [relativedist] + + # Add linear referenced headwater point to closest nhd stream segment + if not headpoint in cumulative_line: + cumulative_line = cumulative_line + [headpoint] + relativedist = shply_linestring.project(headpoint) + relativedistlst = relativedistlst + [relativedist] + + # Sort by relative line distance to place headwater point in linestring + sortline = pd.DataFrame({'geom' : cumulative_line, 'dist' : relativedistlst}).sort_values('dist') + shply_linestring = LineString(sortline.geom.tolist()) + referencedpoints = referencedpoints + [headpoint] + + # Split the new linestring at the new headwater point + try: + line1,line2 = split(shply_linestring, headpoint) + headwaterstreams = headwaterstreams + [LineString(line1)] + nhd_streams.loc[nhd_streams.NHDPlusID==closest_stream.NHDPlusID.values[0],'geometry'] = LineString(line1) + except: + line1 = split(shply_linestring, headpoint) + headwaterstreams = headwaterstreams + [LineString(line1[0])] + nhd_streams.loc[nhd_streams.NHDPlusID==closest_stream.NHDPlusID.values[0],'geometry'] = LineString(line1[0]) + + try: + del cumulative_line, relativedistlst + except: + print (f"issue deleting adjusted stream variables for huc {huc}") + + else: + snapped_ahps = snapped_ahps + [headpoint] + nws_lid = nws_lid + [point[headwater_id]] + + nhd_streams = nhd_streams.drop(columns=['is_relevant_stream', 'headwaters_id', 'downstream_of_headwater']) + + try: + del nhd_streams_adj, headwater_limited, referencedpoints, headwaterstreams + except: + print (f"issue deleting adjusted stream variables for huc {huc}") + + # Create snapped ahps sites + if len(snapped_ahps) > 0: + snapped_ahps_points = gpd.GeoDataFrame({'pt_type': 'nws_lid', headwater_id: nws_lid, 'mainstem': True, + 'geometry': snapped_ahps},geometry='geometry',crs=PREP_PROJECTION) + + # Identify ajusted nhd headwaters + nhd_headwater_streams_adj = nhd_streams.loc[nhd_streams['is_headwater'],:] + nhd_headwater_streams_adj = nhd_headwater_streams_adj.explode() + + hw_points = np.zeros(len(nhd_headwater_streams_adj),dtype=object) + for index,lineString in enumerate(nhd_headwater_streams_adj.geometry): + hw_point = [point for point in zip(*lineString.coords.xy)][-1] + hw_points[index] = Point(*hw_point) + + + nhd_headwater_points_adj = gpd.GeoDataFrame({'pt_type': 'NHDPlusID', headwater_id: nhd_headwater_streams_adj['NHDPlusID'], + 'mainstem': False, 'geometry': hw_points},geometry='geometry',crs=PREP_PROJECTION) + + nhd_headwater_points_adj = nhd_headwater_points_adj.reset_index(drop=True) + + del nhd_headwater_streams_adj + + try: + combined_pts = snapped_ahps_points.append(nhd_headwater_points_adj) + except: + combined_pts = nhd_headwater_points_adj.copy() + + return nhd_streams, combined_pts + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='adjust headwater stream geometery based on headwater start points') + parser.add_argument('-f','--huc',help='huc number',required=True) + parser.add_argument('-l','--nhd-streams',help='NHDPlus HR geodataframe',required=True) + parser.add_argument('-p','--nwm-headwaters',help='Headwater points layer',required=True,type=str) + parser.add_argument('-s','--subset-nhd-streams-fileName',help='Output streams layer name',required=False,type=str,default=None) + parser.add_argument('-a','--adj-headwater-points-fileName',help='Output adj headwater points layer name',required=False,type=str,default=None) + parser.add_argument('-g','--headwater-points-fileName',help='Output headwater points layer name',required=False,type=str,default=None) + parser.add_argument('-b','--nws-lids',help='NWS lid points',required=True) + parser.add_argument('-i','--headwater-id',help='Headwater id column name',required=True) + + args = vars(parser.parse_args()) + + #TODO variables below are not defined + + adj_streams_gdf, adj_headwaters_gdf = adjust_headwaters(huc,nhd_streams,nwm_headwaters,nws_lids,headwater_id) + + if subset_nhd_streams_fileName is not None: + adj_streams_gdf.to_file(args['subset_nhd_streams_fileName'],driver=getDriver(args['subset_nhd_streams_fileName'])) + + if headwater_points_fileName is not None: + headwater_points_fileName.to_file(args['headwater_points_fileName'],driver=getDriver(args['headwater_points_fileName'])) + + if adj_headwater_points_fileName is not None: + adj_headwaters_gdf.to_file(args['adj_headwater_points_fileName'],driver=getDriver(args['adj_headwater_points_fileName'])) diff --git a/lib/adjust_thalweg_lateral.py b/src/adjust_thalweg_lateral.py similarity index 77% rename from lib/adjust_thalweg_lateral.py rename to src/adjust_thalweg_lateral.py index 24b0222e2..c9930f480 100755 --- a/lib/adjust_thalweg_lateral.py +++ b/src/adjust_thalweg_lateral.py @@ -2,120 +2,114 @@ import argparse -from numba import njit, typeof, typed, types +from numba import njit, typed, types import rasterio import numpy as np +from utils.shared_functions import mem_profile -def adjust_thalweg_laterally(elevation_raster, stream_raster, allocation_raster, cost_distance_raster, cost_distance_tolerance, dem_lateral_thalweg_adj): - +@mem_profile +def adjust_thalweg_laterally(elevation_raster, stream_raster, allocation_raster, cost_distance_raster, cost_distance_tolerance, dem_lateral_thalweg_adj,lateral_elevation_threshold): + # ------------------------------------------- Get catchment_min_dict --------------------------------------------------- # # The following algorithm searches for the zonal minimum elevation in each pixel catchment - # It updates the catchment_min_dict with this zonal minimum elevation value. @njit def make_zone_min_dict(elevation_window, zone_min_dict, zone_window, cost_window, cost_tolerance, ndv): - for i,cm in enumerate(zone_window): + for i,elev_m in enumerate(zone_window): # If the zone really exists in the dictionary, compare elevation values. i = int(i) - cm = int(cm) - + elev_m = int(elev_m) + if (cost_window[i] <= cost_tolerance): if elevation_window[i] > 0: # Don't allow bad elevation values - if (cm in zone_min_dict): - - if (elevation_window[i] < zone_min_dict[cm]): + if (elev_m in zone_min_dict): + + if (elevation_window[i] < zone_min_dict[elev_m]): # If the elevation_window's elevation value is less than the zone_min_dict min, update the zone_min_dict min. - zone_min_dict[cm] = elevation_window[i] + zone_min_dict[elev_m] = elevation_window[i] else: - zone_min_dict[cm] = elevation_window[i] + zone_min_dict[elev_m] = elevation_window[i] + return(zone_min_dict) - - # Open the masked gw_catchments_pixels_masked and dem_thalwegCond_masked. + + # Open files. elevation_raster_object = rasterio.open(elevation_raster) allocation_zone_raster_object = rasterio.open(allocation_raster) cost_distance_raster_object = rasterio.open(cost_distance_raster) - + meta = elevation_raster_object.meta.copy() meta['tiled'], meta['compress'] = True, 'lzw' - + # -- Create zone_min_dict -- # - print("Create zone_min_dict") - zone_min_dict = typed.Dict.empty(types.int32,types.float32) # Initialize an empty dictionary to store the catchment minimums. + zone_min_dict = typed.Dict.empty(types.int32,types.float32) # Initialize an empty dictionary to store the catchment minimums # Update catchment_min_dict with pixel sheds minimum. - - for ji, window in elevation_raster_object.block_windows(1): # Iterate over windows, using elevation_raster_object as template. - elevation_window = elevation_raster_object.read(1,window=window).ravel() # Define elevation_window. - zone_window = allocation_zone_raster_object.read(1,window=window).ravel() # Define zone_window. - cost_window = cost_distance_raster_object.read(1, window=window).ravel() # Define cost_window. + for ji, window in elevation_raster_object.block_windows(1): # Iterate over windows, using elevation_raster_object as template + elevation_window = elevation_raster_object.read(1,window=window).ravel() # Define elevation_window + zone_window = allocation_zone_raster_object.read(1,window=window).ravel() # Define zone_window + cost_window = cost_distance_raster_object.read(1, window=window).ravel() # Define cost_window # Call numba-optimized function to update catchment_min_dict with pixel sheds minimum. zone_min_dict = make_zone_min_dict(elevation_window, zone_min_dict, zone_window, cost_window, int(cost_distance_tolerance), meta['nodata']) - - # ------------------------------------------------------------------------------------------------------------------------ # - + + # ------------------------------------------------------------------------------------------------------------------------ # + elevation_raster_object.close() allocation_zone_raster_object.close() cost_distance_raster_object.close() - + # ------------------------------------------- Assign zonal min to thalweg ------------------------------------------------ # @njit def minimize_thalweg_elevation(dem_window, zone_min_dict, zone_window, thalweg_window): - + # Copy elevation values into new array that will store the minimized elevation values. dem_window_to_return = np.empty_like (dem_window) dem_window_to_return[:] = dem_window - - for i,cm in enumerate(zone_window): + for i,elev_m in enumerate(zone_window): i = int(i) - cm = int(cm) + elev_m = int(elev_m) thalweg_cell = thalweg_window[i] # From flows_grid_boolean.tif (0s and 1s) if thalweg_cell == 1: # Make sure thalweg cells are checked. - if cm in zone_min_dict: - zone_min_elevation = zone_min_dict[cm] + if elev_m in zone_min_dict: + zone_min_elevation = zone_min_dict[elev_m] dem_thalweg_elevation = dem_window[i] - - elevation_difference = zone_min_elevation - dem_thalweg_elevation - - if zone_min_elevation < dem_thalweg_elevation and elevation_difference <= 5: + + elevation_difference = dem_thalweg_elevation - zone_min_elevation + + if (zone_min_elevation < dem_thalweg_elevation) and (elevation_difference <= lateral_elevation_threshold): dem_window_to_return[i] = zone_min_elevation return(dem_window_to_return) - + # Specify raster object metadata. elevation_raster_object = rasterio.open(elevation_raster) allocation_zone_raster_object = rasterio.open(allocation_raster) thalweg_object = rasterio.open(stream_raster) - dem_lateral_thalweg_adj_object = rasterio.open(dem_lateral_thalweg_adj, 'w', **meta) - - for ji, window in elevation_raster_object.block_windows(1): # Iterate over windows, using dem_rasterio_object as template. - dem_window = elevation_raster_object.read(1,window=window) # Define dem_window. + + for ji, window in elevation_raster_object.block_windows(1): # Iterate over windows, using dem_rasterio_object as template + dem_window = elevation_raster_object.read(1,window=window) # Define dem_window window_shape = dem_window.shape dem_window = dem_window.ravel() - - zone_window = allocation_zone_raster_object.read(1,window=window).ravel() # Define catchments_window. - thalweg_window = thalweg_object.read(1,window=window).ravel() # Define thalweg_window. - + + zone_window = allocation_zone_raster_object.read(1,window=window).ravel() # Define catchments_window + thalweg_window = thalweg_object.read(1,window=window).ravel() # Define thalweg_window + # Call numba-optimized function to reassign thalweg cell values to catchment minimum value. minimized_dem_window = minimize_thalweg_elevation(dem_window, zone_min_dict, zone_window, thalweg_window) minimized_dem_window = minimized_dem_window.reshape(window_shape).astype(np.float32) + dem_lateral_thalweg_adj_object.write(minimized_dem_window, window=window, indexes=1) - dem_lateral_thalweg_adj_object.write(minimized_dem_window, window=window, indexes=1) - elevation_raster_object.close() allocation_zone_raster_object.close() cost_distance_raster_object.close() - - # Delete allocation_raster and distance_raster. - - - + + if __name__ == '__main__': - + # Parse arguments. parser = argparse.ArgumentParser(description='Adjusts the elevation of the thalweg to the lateral zonal minimum.') parser.add_argument('-e','--elevation_raster',help='Raster of elevation.',required=True) @@ -124,11 +118,9 @@ def minimize_thalweg_elevation(dem_window, zone_min_dict, zone_window, thalweg_w parser.add_argument('-d','--cost_distance_raster',help='Raster of cost distances for the allocation raster.',required=True) parser.add_argument('-t','--cost_distance_tolerance',help='Tolerance in meters to use when searching for zonal minimum.',required=True) parser.add_argument('-o','--dem_lateral_thalweg_adj',help='Output elevation raster with adjusted thalweg.',required=True) - + parser.add_argument('-th','--lateral_elevation_threshold',help='Maximum difference between current thalweg elevation and lowest lateral elevation in meters.',required=True,type=int) + # Extract to dictionary and assign to variables. args = vars(parser.parse_args()) - + adjust_thalweg_laterally(**args) - - - diff --git a/src/aggregate_fim_outputs.py b/src/aggregate_fim_outputs.py new file mode 100644 index 000000000..7725abc4f --- /dev/null +++ b/src/aggregate_fim_outputs.py @@ -0,0 +1,223 @@ +#!/usr/bin/env python3 + +import os +import argparse +from multiprocessing import Pool +import pandas as pd +import json +import rasterio +from rasterio.merge import merge +from rasterio.warp import calculate_default_transform, reproject, Resampling +import shutil +import csv +from utils.shared_variables import PREP_PROJECTION,VIZ_PROJECTION + + +def aggregate_fim_outputs(args): + + fim_out_dir = args[0] + huc6 = args[1] + huc_list = args[2] + + print(f"aggregating {huc6}") + + huc6_dir = os.path.join(fim_out_dir,'aggregate_fim_outputs',str(huc6)) + os.makedirs(huc6_dir, exist_ok=True) + + # aggregate file name paths + aggregate_hydrotable = os.path.join(fim_out_dir,'aggregate_fim_outputs',str(huc6),'hydroTable.csv') + aggregate_src = os.path.join(fim_out_dir,'aggregate_fim_outputs',str(huc6),f'rating_curves_{huc6}.json') + + for huc in huc_list: + + # original file paths + hydrotable_filename = os.path.join(fim_out_dir,huc,'hydroTable.csv') + src_filename = os.path.join(fim_out_dir,huc,'src.json') + + if len(huc)> 6: + + # open hydrotable + hydrotable = pd.read_csv(hydrotable_filename) + + # write/append aggregate hydrotable + if os.path.isfile(aggregate_hydrotable): + hydrotable.to_csv(aggregate_hydrotable,index=False, mode='a',header=False) + else: + hydrotable.to_csv(aggregate_hydrotable,index=False) + + del hydrotable + + # open src + src = open(src_filename) + src = json.load(src) + + # write/append aggregate src + if os.path.isfile(aggregate_src): + + with open(aggregate_src, "r+") as file: + data = json.load(file) + data.update(src) + + with open(aggregate_src, 'w') as outfile: + json.dump(data, outfile) + else: + with open(aggregate_src, 'w') as outfile: + json.dump(src, outfile) + + del src + + else: + shutil.copy(hydrotable_filename, aggregate_hydrotable) + shutil.copy(src_filename, aggregate_src) + + ## add feature_id to aggregate src + # Open aggregate src for writing feature_ids to + src_data = {} + with open(aggregate_src) as jsonf: + src_data = json.load(jsonf) + + with open(aggregate_hydrotable) as csvf: + csvReader = csv.DictReader(csvf) + + for row in csvReader: + if row['HydroID'].lstrip('0') in src_data and 'nwm_feature_id' not in src_data[row['HydroID'].lstrip('0')]: + src_data[row['HydroID'].lstrip('0')]['nwm_feature_id'] = row['feature_id'] + + # Write src_data to JSON file + with open(aggregate_src, 'w') as jsonf: + json.dump(src_data, jsonf) + + ## aggregate rasters + # aggregate file paths + rem_mosaic = os.path.join(huc6_dir,f'hand_grid_{huc6}_prepprj.tif') + catchment_mosaic = os.path.join(huc6_dir,f'catchments_{huc6}_prepprj.tif') + + if huc6 not in huc_list: + + huc6_filter = [path.startswith(huc6) for path in huc_list] + subset_huc6_list = [i for (i, v) in zip(huc_list, huc6_filter) if v] + + # aggregate and mosaic rem + rem_list = [os.path.join(fim_out_dir,huc,'rem_zeroed_masked.tif') for huc in subset_huc6_list] + + if len(rem_list) > 1: + + rem_files_to_mosaic = [] + + for rem in rem_list: + + rem_src = rasterio.open(rem) + rem_files_to_mosaic.append(rem_src) + + mosaic, out_trans = merge(rem_files_to_mosaic) + out_meta = rem_src.meta.copy() + out_meta.update({"driver": "GTiff", "height": mosaic.shape[1], "width": mosaic.shape[2], "dtype": str(mosaic.dtype), "transform": out_trans,"crs": PREP_PROJECTION,'compress': 'lzw'}) + + with rasterio.open(rem_mosaic, "w", **out_meta, tiled=True, blockxsize=1024, blockysize=1024, BIGTIFF='YES') as dest: + dest.write(mosaic) + + del rem_files_to_mosaic,rem_src,out_meta,mosaic + + elif len(rem_list)==1: + + shutil.copy(rem_list[0], rem_mosaic) + + # aggregate and mosaic catchments + catchment_list = [os.path.join(fim_out_dir,huc,'gw_catchments_reaches_filtered_addedAttributes.tif') for huc in subset_huc6_list] + + if len(catchment_list) > 1: + + cat_files_to_mosaic = [] + + for cat in catchment_list: + cat_src = rasterio.open(cat) + cat_files_to_mosaic.append(cat_src) + + mosaic, out_trans = merge(cat_files_to_mosaic) + out_meta = cat_src.meta.copy() + + out_meta.update({"driver": "GTiff", "height": mosaic.shape[1], "width": mosaic.shape[2], "dtype": str(mosaic.dtype), "transform": out_trans,"crs": PREP_PROJECTION,'compress': 'lzw'}) + + with rasterio.open(catchment_mosaic, "w", **out_meta, tiled=True, blockxsize=1024, blockysize=1024, BIGTIFF='YES') as dest: + dest.write(mosaic) + + del cat_files_to_mosaic,cat_src,out_meta,mosaic + + elif len(catchment_list)==1: + + shutil.copy(catchment_list[0], catchment_mosaic) + + else: + # original file paths + rem_filename = os.path.join(fim_out_dir,huc6,'rem_zeroed_masked.tif') + catchment_filename = os.path.join(fim_out_dir,huc6,'gw_catchments_reaches_filtered_addedAttributes.tif') + + shutil.copy(rem_filename, rem_mosaic) + shutil.copy(catchment_filename, catchment_mosaic) + + ## reproject rasters + reproject_raster(rem_mosaic,VIZ_PROJECTION) + os.remove(rem_mosaic) + + reproject_raster(catchment_mosaic,VIZ_PROJECTION) + os.remove(catchment_mosaic) + + +def reproject_raster(raster_name,reprojection): + + with rasterio.open(raster_name) as src: + transform, width, height = calculate_default_transform( + src.crs, reprojection, src.width, src.height, *src.bounds) + kwargs = src.meta.copy() + kwargs.update({ + 'crs': reprojection, + 'transform': transform, + 'width': width, + 'height': height, + 'compress': 'lzw' + }) + + raster_proj_rename = os.path.split(raster_name)[1].replace('_prepprj.tif', '.tif') + raster_proj_dir = os.path.join(os.path.dirname(raster_name), raster_proj_rename) + + with rasterio.open(raster_proj_dir, 'w', **kwargs, tiled=True, blockxsize=1024, blockysize=1024, BIGTIFF='YES') as dst: + # for i in range(1, src.count + 1): + reproject( + source=rasterio.band(src, 1), + destination=rasterio.band(dst, 1), + src_transform=src.transform, + src_crs=src.crs, + dst_transform=transform, + dst_crs=reprojection, + resampling=Resampling.nearest) + del src, dst + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Aggregate layers buy HUC6') + parser.add_argument('-d','--fim-outputs-directory', help='FIM outputs directory', required=True) + parser.add_argument('-j','--number-of-jobs',help='Number of processes to use. Default is 1.',required=False, default="1",type=int) + + + args = vars(parser.parse_args()) + + fim_outputs_directory = args['fim_outputs_directory'] + number_of_jobs = int(args['number_of_jobs']) + + drop_folders = ['logs'] + huc_list = [huc for huc in os.listdir(fim_outputs_directory) if huc not in drop_folders] + huc6_list = [str(huc[0:6]) for huc in os.listdir(fim_outputs_directory) if huc not in drop_folders] + huc6_list = list(set(huc6_list)) + + procs_list = [] + + for huc6 in huc6_list: + + limited_huc_list = [huc for huc in huc_list if huc.startswith(huc6)] + + procs_list.append([fim_outputs_directory,huc6,limited_huc_list]) + + print(f"aggregating {len(huc_list)} hucs to HUC6 scale using {number_of_jobs} jobs") + with Pool(processes=number_of_jobs) as pool: + pool.map(aggregate_fim_outputs, procs_list) diff --git a/lib/aggregate_fim_outputs.sh b/src/aggregate_fim_outputs.sh similarity index 100% rename from lib/aggregate_fim_outputs.sh rename to src/aggregate_fim_outputs.sh diff --git a/src/aggregate_vector_inputs.py b/src/aggregate_vector_inputs.py new file mode 100755 index 000000000..aa47342b8 --- /dev/null +++ b/src/aggregate_vector_inputs.py @@ -0,0 +1,401 @@ +#!/usr/bin/env python3 + +import os +# sys.path.append('/foss_fim/src') +import geopandas as gpd +from utils.shared_variables import PREP_PROJECTION +from utils.shared_functions import getDriver +from reduce_nhd_stream_density import subset_nhd_network +from adjust_headwater_streams import adjust_headwaters +from shapely.geometry import Point +from concurrent.futures import ProcessPoolExecutor +from collections import deque +import numpy as np +from shapely.wkb import dumps, loads +import pygeos + +nhdplus_vectors_dir = os.environ.get('nhdplus_vectors_dir') +wbd_filename = os.environ.get('wbd_filename') +nwm_streams_orig_filename = os.environ.get('nwm_streams_orig_filename') +nwm_streams_all_filename = os.environ.get('nwm_streams_all_filename') +nwm_headwaters_filename = os.environ.get('nwm_headwaters_filename') +nwm_catchments_orig_filename = os.environ.get('nwm_catchments_orig_filename') +nwm_catchments_all_filename = os.environ.get('nwm_catchments_all_filename') +ahps_filename = os.environ.get('ahps_filename') +nwm_huc4_intersections_filename = os.environ.get('nwm_huc4_intersections_filename') +nhd_huc8_intersections_filename = os.environ.get('nhd_huc8_intersections_filename') +agg_nhd_headwaters_adj_fileName = os.environ['agg_nhd_headwaters_adj_fileName'] +agg_nhd_streams_adj_fileName = os.environ['agg_nhd_streams_adj_fileName'] + + +def identify_nwm_ms_streams(nwm_streams_filename,ahps_filename,nwm_streams_all_filename): + + # Subset nwm network to ms + ahps_headwaters = gpd.read_file(ahps_filename) + + nwm_streams = gpd.read_file(nwm_streams_filename) + + # Remove mainstem column if it already exists + nwm_streams = nwm_streams.drop(['mainstem'], axis=1, errors='ignore') + + nwm_streams['is_headwater'] = False + + nwm_streams.loc[nwm_streams.ID.isin(list(ahps_headwaters.nwm_featur)),'is_headwater'] = True + + # Subset NHDPlus HR + nwm_streams['is_relevant_stream'] = nwm_streams['is_headwater'].copy() + + nwm_streams = nwm_streams.explode() + + # Trace down from headwaters + nwm_streams.set_index('ID',inplace=True,drop=False) + + Q = deque(nwm_streams.loc[nwm_streams['is_headwater'],'ID'].tolist()) + visited = set() + + while Q: + q = Q.popleft() + if q in visited: + continue + + visited.add(q) + toNode = nwm_streams.loc[q,'to'] + + if not toNode == 0: + + nwm_streams.loc[nwm_streams.ID==toNode,'is_relevant_stream'] = True + + if toNode not in visited: + Q.append(toNode) + + nwm_streams_ms = nwm_streams.loc[nwm_streams['is_relevant_stream'],:] + ms_segments = nwm_streams_ms.ID.to_list() + + nwm_streams.reset_index(drop=True,inplace=True) + + # Add column to FR nwm layer to indicate MS segments + nwm_streams['mainstem'] = np.where(nwm_streams.ID.isin(ms_segments), 1, 0) + + nwm_streams = nwm_streams.drop(['is_relevant_stream','is_headwater'], axis=1, errors='ignore') + + nwm_streams.to_file(nwm_streams_all_filename,driver=getDriver(nwm_streams_all_filename),index=False,layer='nwm_streams') + + return ms_segments + + +def find_nwm_incoming_streams(nwm_streams_,wbd,huc_unit): + + # Input wbd + if isinstance(wbd,str): + + layer = f"WBDHU{huc_unit}" + wbd = gpd.read_file(wbd, layer=layer) + elif isinstance(wbd,gpd.GeoDataFrame): + pass + else: + raise TypeError("Pass dataframe or filepath for wbd") + + intersecting_points = [] + nhdplus_ids = [] + mainstem_flag = [] + print (f"iterating through {len(wbd)} hucs") + for index, row in wbd.iterrows(): + + col_name = f"HUC{huc_unit}" + huc = row[col_name] + huc_mask = wbd.loc[wbd[col_name]==str(huc)] + huc_mask = huc_mask.explode() + huc_mask = huc_mask.reset_index(drop=True) + + # Input nwm streams + if isinstance(nwm_streams_,str): + nwm_streams = gpd.read_file(nwm_streams_, mask=huc_mask) + elif isinstance(nwm_streams_,gpd.GeoDataFrame): + nwm_streams = nwm_streams_.copy() + else: + raise TypeError("Pass dataframe or filepath for nwm streams") + + nwm_streams = nwm_streams.explode() + nwm_streams = nwm_streams.reset_index(drop=True) + + for index, polygon in enumerate(huc_mask.geometry): + + crosses=nwm_streams.crosses(polygon.exterior) + nwm_streams_subset =nwm_streams[crosses] + nwm_streams_subset = nwm_streams_subset.reset_index(drop=True) + + for index, segment in nwm_streams_subset.iterrows(): + distances = [] + + try: + nhdplus_id = segment.ID + except: + nhdplus_id = segment.NHDPlusID + + linestring = segment.geometry + mainstem = segment.mainstem + + # Distance to each stream segment + for point in zip(*linestring.coords.xy): + distance = Point(point).distance(polygon.exterior) + distances = distances + [distance] + + # Find minimum distance + min_index = np.argmin(distances) + + # Closest segment to headwater + closest_point = list(linestring.coords)[min_index] + last_node = Point(closest_point) + + # Convert geometries to WKB representation + wkb_point = dumps(last_node) + wkb_poly = dumps(polygon.exterior) + + # Create pygeos geometries from WKB representation + stream_point_geom = pygeos.io.from_wkb(wkb_point) + polybin_geom = pygeos.io.from_wkb(wkb_poly) + + # Linear reference end node to huc boundary + pointdistancetoline = pygeos.linear.line_locate_point(polybin_geom,stream_point_geom) + referencedpoint = pygeos.linear.line_interpolate_point(polybin_geom, pointdistancetoline) + + # Convert geometries to wkb representation + bin_referencedpoint = pygeos.io.to_wkb(referencedpoint) + + # Convert to shapely geometries + shply_referencedpoint = loads(bin_referencedpoint) + + # Collect all nhd stream segment linestring verticies + intersecting_points = intersecting_points + [shply_referencedpoint] + nhdplus_ids = nhdplus_ids + [nhdplus_id] + mainstem_flag = mainstem_flag + [mainstem] + + del huc_mask + + huc_intersection = gpd.GeoDataFrame({'geometry': intersecting_points, 'NHDPlusID': nhdplus_ids,'mainstem': mainstem_flag},crs=nwm_streams.crs,geometry='geometry') + huc_intersection = huc_intersection.drop_duplicates() + + del nwm_streams,wbd + + return huc_intersection + + +def collect_stream_attributes(nhdplus_vectors_dir, huc): + + print (f"Starting attribute collection for HUC {huc}",flush=True) + + # Collecting NHDPlus HR attributes + burnline_filename = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '.gpkg') + vaa_filename = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusFlowLineVAA' + str(huc) + '.gpkg') + flowline_filename = os.path.join(nhdplus_vectors_dir,huc,'NHDFlowline' + str(huc) + '.gpkg') + + if os.path.exists(os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '.gpkg')): + + burnline = gpd.read_file(burnline_filename) + burnline = burnline[['NHDPlusID','ReachCode','geometry']] + flowline = gpd.read_file(flowline_filename) + flowline = flowline[['NHDPlusID','FType','FCode']] + # flowline = flowline.loc[flowline["FType"].isin([334,420,428,460,558])] + flowline = flowline.loc[~flowline["FType"].isin([566,420])] + + nhd_streams_vaa = gpd.read_file(vaa_filename) + nhd_streams_vaa = nhd_streams_vaa[['FromNode','ToNode','NHDPlusID','StreamOrde','DnLevelPat','LevelPathI']] + nhd_streams = burnline.merge(nhd_streams_vaa,on='NHDPlusID',how='inner') + nhd_streams = nhd_streams.merge(flowline,on='NHDPlusID',how='inner') + + del burnline, flowline, nhd_streams_vaa + + nhd_streams = nhd_streams.to_crs(PREP_PROJECTION) + nhd_streams = nhd_streams.loc[nhd_streams.geometry!=None,:] # special case: remove segments without geometries + nhd_streams['HUC4'] = str(huc) + + # special case; breach in network at Tiber Dam + if huc == '1003' and nhd_streams.loc[nhd_streams.NHDPlusID==23001300078682.0,'DnLevelPat'] == 23001300001574.0: + nhd_streams = nhd_streams.loc[nhd_streams.NHDPlusID!=23001300009084.0] + nhd_streams.loc[nhd_streams.NHDPlusID==23001300078682.0,'DnLevelPat'] = 23001300001566.0 + + # Write out NHDPlus HR aggregated + nhd_streams_agg_fileName = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_agg.gpkg') + nhd_streams.to_file(nhd_streams_agg_fileName,driver=getDriver(nhd_streams_agg_fileName),index=False) + + del nhd_streams + + print (f"finished attribute collection for HUC {huc}",flush=True) + + else: + print (f"missing data for HUC {huc}",flush=True) + + +def subset_stream_networks(args, huc): + + nwm_headwaters_filename = args[0] + ahps_filename = args[1] + wbd4 = args[2] + wbd8 = args[3] + nhdplus_vectors_dir = args[4] + nwm_huc4_intersections_filename = args[5] + + print(f"starting stream subset for HUC {huc}",flush=True) + nwm_headwater_id = 'ID' + ahps_headwater_id = 'nws_lid' + headwater_pts_id = 'site_id' + + column_order = ['pt_type', headwater_pts_id, 'mainstem', 'geometry'] + nhd_streams_filename = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_agg.gpkg') + + # Subset to reduce footprint + selected_wbd4 = wbd4.loc[wbd4.HUC4.str.startswith(huc)] + del wbd4 + selected_wbd8 = wbd8.loc[wbd8.HUC8.str.startswith(huc)] + del wbd8 + + huc_mask = selected_wbd4.loc[selected_wbd4.HUC4.str.startswith(huc)] + huc_mask = huc_mask.explode() + huc_mask = huc_mask.reset_index(drop=True) + + if len(selected_wbd8.HUC8) > 0: + + selected_wbd8 = selected_wbd8.reset_index(drop=True) + + # Identify FR/NWM headwaters and subset HR network + try: + nhd_streams_fr = subset_nhd_network(huc,huc_mask,selected_wbd8,nhd_streams_filename,nwm_headwaters_filename,nwm_headwater_id,nwm_huc4_intersections_filename) + except: + print (f"Error subsetting NHD HR network for HUC {huc}",flush=True) + + # Identify nhd mainstem streams + try: + nhd_streams_all = subset_nhd_network(huc,huc_mask,selected_wbd8,nhd_streams_fr,ahps_filename,ahps_headwater_id,nwm_huc4_intersections_filename,True) + except: + print (f"Error identifing MS network for HUC {huc}",flush=True) + + # Identify HUC8 intersection points + nhd_huc8_intersections = find_nwm_incoming_streams(nhd_streams_all,selected_wbd8,8) + + # Load nwm headwaters + nwm_headwaters = gpd.read_file(nwm_headwaters_filename, mask=huc_mask) + nwm_headwaters['pt_type'] = 'nwm_headwater' + nwm_headwaters = nwm_headwaters.rename(columns={"ID": headwater_pts_id}) + + # Load nws lids + nws_lids = gpd.read_file(ahps_filename, mask=huc_mask) + nws_lids = nws_lids.drop(columns=['name','nwm_feature_id','usgs_site_code','states','HUC8','is_headwater', 'is_colocated']) + nws_lids = nws_lids.rename(columns={"nws_lid": headwater_pts_id}) + nws_lids['pt_type'] = 'nws_lid' + nws_lids['mainstem'] = True + + if (len(nwm_headwaters) > 0) or (len(nws_lids) > 0): + + # Adjust FR/NWM headwater segments + adj_nhd_streams_all, adj_nhd_headwater_points = adjust_headwaters(huc,nhd_streams_all,nwm_headwaters,nws_lids,headwater_pts_id) + + adj_nhd_headwater_points = adj_nhd_headwater_points[column_order] + nhd_huc8_intersections['pt_type'] = 'nhd_huc8_intersections' + nhd_huc8_intersections = nhd_huc8_intersections.rename(columns={"NHDPlusID": headwater_pts_id}) + nhd_huc8_intersections = nhd_huc8_intersections[column_order] + adj_nhd_headwater_points_all = adj_nhd_headwater_points.append(nhd_huc8_intersections) + adj_nhd_headwater_points_all = adj_nhd_headwater_points_all.reset_index(drop=True) + + adj_nhd_streams_all_fileName = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_adj.gpkg') + adj_nhd_headwaters_all_fileName = os.path.join(nhdplus_vectors_dir,huc,'nhd' + str(huc) + '_headwaters_adj.gpkg') + + # Write out FR adjusted + adj_nhd_streams_all.to_file(adj_nhd_streams_all_fileName,driver=getDriver(adj_nhd_streams_all_fileName),index=False) + adj_nhd_headwater_points_all.to_file(adj_nhd_headwaters_all_fileName,driver=getDriver(adj_nhd_headwaters_all_fileName),index=False) + + del adj_nhd_streams_all, adj_nhd_headwater_points_all + + else: + + print (f"skipping headwater adjustments for HUC {huc}") + + del nhd_streams_fr + + print(f"finished stream subset for HUC {huc}",flush=True) + + +def aggregate_stream_networks(nhdplus_vectors_dir,agg_nhd_headwaters_adj_fileName,agg_nhd_streams_adj_fileName,huc_list): + + for huc in huc_list: + + # aggregated final filenames + nhd_agg_adj_huc_subset = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_adj.gpkg') + nhd_agg_adj_headwaters_subset = os.path.join(nhdplus_vectors_dir,huc,'nhd' + str(huc) + '_headwaters_adj.gpkg') + + if os.path.isfile(nhd_agg_adj_huc_subset): + adj_nhd_streams_all = gpd.read_file(nhd_agg_adj_huc_subset) + + # Write out FR adjusted + if os.path.isfile(agg_nhd_streams_adj_fileName): + adj_nhd_streams_all.to_file(agg_nhd_streams_adj_fileName,driver=getDriver(agg_nhd_streams_adj_fileName),index=False, mode='a') + else: + adj_nhd_streams_all.to_file(agg_nhd_streams_adj_fileName,driver=getDriver(agg_nhd_streams_adj_fileName),index=False) + + del adj_nhd_streams_all + + if os.path.isfile(nhd_agg_adj_headwaters_subset): + adj_nhd_headwater_points_all = gpd.read_file(nhd_agg_adj_headwaters_subset) + + # Write out FR adjusted + if os.path.isfile(agg_nhd_headwaters_adj_fileName): + adj_nhd_headwater_points_all.to_file(agg_nhd_headwaters_adj_fileName,driver=getDriver(agg_nhd_headwaters_adj_fileName),index=False, mode='a') + else: + adj_nhd_headwater_points_all.to_file(agg_nhd_headwaters_adj_fileName,driver=getDriver(agg_nhd_headwaters_adj_fileName),index=False) + + del adj_nhd_headwater_points_all + + +def clean_up_intermediate_files(nhdplus_vectors_dir): + + for huc in os.listdir(nhdplus_vectors_dir): + + agg_path= os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_agg.gpkg') + streams_adj_path= os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_adj.gpkg') + headwater_adj_path= os.path.join(nhdplus_vectors_dir,huc,'nhd' + str(huc) + '_headwaters_adj.gpkg') + + if os.path.exists(agg_path): + os.remove(agg_path) + + if os.path.exists(streams_adj_path): + os.remove(streams_adj_path) + + if os.path.exists(headwater_adj_path): + os.remove(headwater_adj_path) + + +if __name__ == '__main__': + + # # Generate NWM Headwaters + + print ('loading HUC4s') + wbd4 = gpd.read_file(wbd_filename, layer='WBDHU4') + print ('loading HUC8s') + wbd8 = gpd.read_file(wbd_filename, layer='WBDHU8') + + subset_arg_list = (nwm_headwaters_filename,ahps_filename,wbd4,wbd8,nhdplus_vectors_dir,nwm_huc4_intersections_filename) + huc_list = os.listdir(nhdplus_vectors_dir) + + missing_subsets = [] + for huc in os.listdir(nhdplus_vectors_dir): + streams_adj_path= os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_adj.gpkg') + if not os.path.isfile(streams_adj_path): + missing_subsets = missing_subsets + [huc] + + + print (f"Subsetting stream network for {len(missing_subsets)} HUC4s") + num_workers=11 + + with ProcessPoolExecutor(max_workers=num_workers) as executor: + # Preprocess nhd hr and add attributes + # collect_attributes = [executor.submit(collect_stream_attributes, nhdplus_vectors_dir, str(huc)) for huc in huc_list] + # Subset nhd hr network + subset_results = [executor.submit(subset_stream_networks, subset_arg_list, str(huc)) for huc in missing_subsets] + + del wbd4,wbd8 + + # Aggregate subset nhd networks for entire nwm domain + print ('Aggregating subset NHD networks for entire NWM domain') + aggregate_stream_networks(nhdplus_vectors_dir,agg_nhd_headwaters_adj_fileName,agg_nhd_streams_adj_fileName,missing_subsets) + + # Remove intermediate files + # clean_up_intermediate_files(nhdplus_vectors_dir) diff --git a/lib/agreedem.py b/src/agreedem.py similarity index 99% rename from lib/agreedem.py rename to src/agreedem.py index 15ae40c4c..7d15b7f80 100755 --- a/lib/agreedem.py +++ b/src/agreedem.py @@ -4,8 +4,10 @@ import os import argparse from r_grow_distance import r_grow_distance +from utils.shared_functions import mem_profile +@mem_profile def agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buffer_dist, smooth_drop, sharp_drop, delete_intermediate_data): ''' Produces a hydroconditioned raster using the AGREE DEM methodology as described by Ferdi Hellweger (https://www.caee.utexas.edu/prof/maidment/gishydro/ferdi/research/agree/agree.html). The GRASS gis tool r.grow.distance is used to calculate intermediate allocation and proximity rasters. @@ -45,8 +47,6 @@ def agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buff # Import dem layer and river layer and get dem profile. elev = rasterio.open(dem) dem_profile = elev.profile - if elev.nodata == 0.0: - dem_profile.update(nodata = -999) rivers = rasterio.open(rivers_raster) diff --git a/lib/bash_functions.env b/src/bash_functions.env similarity index 100% rename from lib/bash_functions.env rename to src/bash_functions.env diff --git a/src/bathy_src_adjust_topwidth.py b/src/bathy_src_adjust_topwidth.py new file mode 100644 index 000000000..dc2c94dd9 --- /dev/null +++ b/src/bathy_src_adjust_topwidth.py @@ -0,0 +1,340 @@ +#!/usr/bin/env python3 + +import sys +import os +import pandas as pd +import numpy as np +import argparse +import matplotlib.pyplot as plt +import seaborn as sns +from multiprocessing import Pool +from os.path import isfile, join, dirname, isdir +import datetime as dt +import sys +from os import environ +sns.set_theme(style="whitegrid") +# from utils.shared_functions import mem_profile + +""" + Estimate feature_id missing bathymetry in the raw channel geometry using input bankfull regression geometry + + Parameters + ---------- + fim_dir : str + Directory containing FIM output folders. + bankfull_geom : str + Input regression dataset w/ bankfull geometry by featureid (topwidth & xsec area) + number_of_jobs : int + Number of jobs. + src_plot_option : str + Optional (True or False): use this flag to crate src plots for all hydroids +""" + +# sa_ratio_flag = 10 +# thal_stg_limit = 3 +# bankful_xs_ratio_flag = 10 +# bathy_xsarea_flag = 1 +# thal_hyd_radius_flag = 10 +# ignore_streamorder = 10 +sa_ratio_flag = float(environ['surf_area_thalweg_ratio_flag']) #10x --> Flag: Surface area ratio value to identify possible thalweg notch "jump" (SA x+1 / SA x) +thal_stg_limit = float(environ['thalweg_stg_search_max_limit']) #3m --> Threshold: Stage value limit below which to look for the surface area ratio flag (only flag thalweg notch below this threshold) +bankful_xs_ratio_flag = float(environ['bankful_xs_area_ratio_flag']) #10x --> Flag: Identify bogus BARC adjusted values where the regression bankfull XS Area/SRC bankfull area is > threshold (topwidth crosswalk issues or bad bankfull regression data points??) +bathy_xsarea_flag = float(environ['bathy_xs_area_chg_flag']) #1x --> Flag: Cross section area limit to cap the amount of bathy XS area added to the SRC. Limits the bathy_calc_xs_area/ BANKFULL_XSEC_AREA to the specified threshold +thal_hyd_radius_flag = float(environ['thalweg_hyd_radius_flag']) #10x --> Flag: Idenitify possible erroneous BARC-adjusted hydraulic radius values. BARC discharge values greater than the specified threshold and within the thal_stg_limit are set to 0 +ignore_streamorder = int(environ['ignore_streamorders']) #10 --> Do not perform BARC for streamorders >= provided value + +def bathy_rc_lookup(args): + input_src_fileName = args[0] + df_bfull_geom = args[1] + output_bathy_fileName = args[2] + output_bathy_streamorder_fileName = args[3] + output_bathy_thalweg_fileName = args[4] + output_bathy_xs_lookup_fileName = args[5] + input_htable_fileName = args[6] + out_src_filename = args[7] + huc = args[8] + src_plot_option = args[9] + huc_plot_output_dir = args[10] + + log_text = 'Calculating: ' + str(huc) + '\n' + + ## Read in the default src_full_crosswalked.csv + input_src_base = pd.read_csv(input_src_fileName, dtype= {'feature_id': int}) + + ## Rename input bankfull_geom data columns for consistant referencing + df_bfull_geom = df_bfull_geom.rename(columns={'COMID':'feature_id','BANKFULL_WIDTH':'BANKFULL_WIDTH (m)','BANKFULL_XSEC_AREA':'BANKFULL_XSEC_AREA (m2)'}) + df_bfull_geom = df_bfull_geom.rename(columns={'BANKFULL_TOPWIDTH_q':'BANKFULL_WIDTH (m)','BANKFULL_XSEC_AREA_q':'BANKFULL_XSEC_AREA (m2)'}) + ## Merge input_bathy and modified_src_base df using feature_id/COMID attributes + modified_src_base = input_src_base.merge(df_bfull_geom.loc[:,['feature_id','BANKFULL_WIDTH (m)','BANKFULL_XSEC_AREA (m2)']],how='left',on='feature_id') + + ## Check that the merge process returned matching feature_id entries + if modified_src_base['BANKFULL_WIDTH (m)'].count() == 0: + log_text += 'WARNING: No matching feature_id found between input bathy data and src_base --> No bathy calculations added to SRC for huc ' + str(huc) + '\n' + else: + ## Use SurfaceArea variable to identify thalweg-restricted stage values for each hydroid + ## Calculate the interrow SurfaceArea ratio n/(n-1) + modified_src_base['SA_div_flag'] = modified_src_base['SurfaceArea (m2)'].div(modified_src_base['SurfaceArea (m2)'].shift(1)) + ## Mask SA_div_flag when Stage = 0 or when the SA_div_flag value (n / n-1) is > threshold value (i.e. 10x) + modified_src_base['SA_div_flag'].mask((modified_src_base['Stage']==0) | (modified_src_base['SA_div_flag'] this is used to mask the discharge after Manning's equation + modified_src_base = modified_src_base.merge(find_thalweg_notch.loc[:,['HydroID','Thalweg_burn_elev']],how='left',on='HydroID') + + ## Calculate bankfull vs top width difference for each feature_id + modified_src_base['Top Width Diff (m)'] = (modified_src_base['TopWidth (m)'] - modified_src_base['BANKFULL_WIDTH (m)']).abs() + ## Calculate XS Area field (Channel Volume / Stream Length) + modified_src_base['XS Area (m2)'] = modified_src_base['Volume (m3)'] / (modified_src_base['LENGTHKM'] * 1000) + + ## Groupby HydroID and find min of Top Width Diff (m) + output_bathy = modified_src_base[['feature_id','HydroID','order_','Stage','SurfaceArea (m2)','Thalweg_burn_elev','BANKFULL_WIDTH (m)','TopWidth (m)','XS Area (m2)','BANKFULL_XSEC_AREA (m2)','Top Width Diff (m)']] + ## filter out stage = 0 rows in SRC (assuming geom at stage 0 is not a valid channel geom) + output_bathy = output_bathy[output_bathy['Stage'] > 0] + ## filter SRC rows identified as Thalweg burned + output_bathy['Top Width Diff (m)'].mask(output_bathy['Stage'] <= output_bathy['Thalweg_burn_elev'],inplace=True) + ## ignore hydroid/featureid that did not have a valid Bankfull lookup (areas outside CONUS - i.e. Canada) + output_bathy = output_bathy[output_bathy['BANKFULL_XSEC_AREA (m2)'].notnull()] + ## ignore SRC entries with 0 surface area --> handles input SRC artifacts/errors in Great Lakes region + output_bathy = output_bathy[output_bathy['SurfaceArea (m2)'] > 0] + ## find index of minimum top width difference --> this will be used as the SRC "bankfull" row for future calcs + output_bathy = output_bathy.loc[output_bathy.groupby('HydroID')['Top Width Diff (m)'].idxmin()].reset_index(drop=True) + log_text += ('Average: bankfull width crosswalk difference (m): ' + str(output_bathy['Top Width Diff (m)'].mean())) + '\n' + log_text += ('Minimum: bankfull width crosswalk difference (m): ' + str(output_bathy['Top Width Diff (m)'].min())) + '\n' + log_text += ('Maximum: bankfull width crosswalk difference (m): ' + str(output_bathy['Top Width Diff (m)'].max())) + '\n' + log_text += ('STD: bankfull width crosswalk difference (m): ' + str(output_bathy['Top Width Diff (m)'].std())) +'\n' + + ## Calculate XS Area difference between SRC and Bankfull database + output_bathy['XS Area Diff (m2)'] = (output_bathy['BANKFULL_XSEC_AREA (m2)'] - output_bathy['XS Area (m2)']) + output_bathy['XS Bankfull Area Ratio'] = (output_bathy['BANKFULL_XSEC_AREA (m2)'] / output_bathy['XS Area (m2)']).round(2) + ## masking negative XS Area Diff and XS Area = 0 + output_bathy['XS Bankfull Area Ratio'].mask((output_bathy['XS Area Diff (m2)']<0) | (output_bathy['XS Area (m2)'] == 0),inplace=True) + ## masking negative XS Area Diff and XS Area = 0 + output_bathy['XS Area Diff (m2)'].mask((output_bathy['XS Area Diff (m2)']<0) | (output_bathy['XS Area (m2)'] == 0),inplace=True) + ## remove bogus values where bankfull area ratio > threshold --> 10x (topwidth crosswalk issues or bad bankfull regression data points??) + output_bathy['XS Area Diff (m2)'].mask(output_bathy['XS Bankfull Area Ratio']>bankful_xs_ratio_flag,inplace=True) + ## remove bogus values where bankfull area ratio > threshold --> 10x (topwidth crosswalk issues or bad bankfull regression data points??) + output_bathy['XS Bankfull Area Ratio'].mask(output_bathy['XS Bankfull Area Ratio']>bankful_xs_ratio_flag,inplace=True) + ## Print XS Area Diff statistics + log_text += ('Average: bankfull XS Area crosswalk difference (m2): ' + str(output_bathy['XS Area Diff (m2)'].mean())) + '\n' + log_text += ('Minimum: bankfull XS Area crosswalk difference (m2): ' + str(output_bathy['XS Area Diff (m2)'].min())) + '\n' + log_text += ('Maximum: bankfull XS Area crosswalk difference (m2): ' + str(output_bathy['XS Area Diff (m2)'].max())) + '\n' + log_text += ('STD: bankfull XS Area crosswalk difference (m2): ' + str(output_bathy['XS Area Diff (m2)'].std())) + '\n' + + ## Bin XS Bankfull Area Ratio by stream order + stream_order_bathy_ratio = output_bathy[['order_','Stage','XS Bankfull Area Ratio']].copy() + ## mask stage values when XS Bankfull Area Ratio is null (need to filter to calculate the median for valid values below) + stream_order_bathy_ratio['Stage'].mask(stream_order_bathy_ratio['XS Bankfull Area Ratio'].isnull(),inplace=True) + stream_order_bathy_ratio = stream_order_bathy_ratio.groupby('order_').agg(count=('XS Bankfull Area Ratio','count'),mean_xs_area_ratio=('XS Bankfull Area Ratio','mean'),median_stage_bankfull=('Stage','median')) + ## fill XS Bankfull Area Ratio and Stage values if no values were found in the grouby calcs + stream_order_bathy_ratio = (stream_order_bathy_ratio.ffill()+stream_order_bathy_ratio.bfill())/2 + ## fill first and last stream order values if needed + stream_order_bathy_ratio = stream_order_bathy_ratio.bfill().ffill() + ## Get count_total tally of the total number of stream order hydroids in the HUC (not filtering anything out) + stream_order_bathy_ratio_count = output_bathy.groupby('order_').agg(count_total=('Stage','count')) + stream_order_bathy_ratio = stream_order_bathy_ratio.merge(stream_order_bathy_ratio_count,how='left',on='order_') + ## Fill any remaining null values: mean_xs_area_ratio --> 1 median_stage_bankfull --> 0 + stream_order_bathy_ratio['mean_xs_area_ratio'].mask(stream_order_bathy_ratio['mean_xs_area_ratio'].isnull(),1,inplace=True) + stream_order_bathy_ratio['median_stage_bankfull'].mask(stream_order_bathy_ratio['median_stage_bankfull'].isnull(),0,inplace=True) + + ## Combine SRC df and df of XS Area for each hydroid and matching stage and order from bins above + output_bathy = output_bathy.merge(stream_order_bathy_ratio,how='left',on='order_') + modified_src_base = modified_src_base.merge(stream_order_bathy_ratio,how='left',on='order_') + + ## Calculate stage vs median_stage_bankfull difference for bankfull lookup + modified_src_base['lookup_stage_diff'] = (modified_src_base[['median_stage_bankfull','Thalweg_burn_elev']].max(axis=1) - modified_src_base['Stage']).abs() + + ## If median_stage_bankfull is null then set lookup_stage_diff to 999 at stage 0 (handles errors for channels outside CONUS) + modified_src_base['lookup_stage_diff'].mask((modified_src_base['Stage'] == 0) & (modified_src_base['median_stage_bankfull'].isnull()),999,inplace=True) + + ## Groupby HydroID again and find min of lookup_stage_diff + xs_area_hydroid_lookup = modified_src_base[['HydroID','BANKFULL_XSEC_AREA (m2)','XS Area (m2)','Stage','Thalweg_burn_elev','median_stage_bankfull','lookup_stage_diff','mean_xs_area_ratio']] + xs_area_hydroid_lookup = xs_area_hydroid_lookup.loc[xs_area_hydroid_lookup.groupby('HydroID')['lookup_stage_diff'].idxmin()].reset_index(drop=True) + + ## Calculate bathy adjusted XS Area ('XS Area (m2)' mutliplied by mean_xs_area_ratio) + xs_area_hydroid_lookup['bathy_calc_xs_area'] = (xs_area_hydroid_lookup['XS Area (m2)'] * xs_area_hydroid_lookup['mean_xs_area_ratio']) - xs_area_hydroid_lookup['XS Area (m2)'] + + ## Calculate the ratio btw the lookup SRC XS_Area and the Bankfull_XSEC_AREA --> use this as a flag for potentially bad XS data + xs_area_hydroid_lookup['bankfull_XS_ratio_flag'] = (xs_area_hydroid_lookup['bathy_calc_xs_area'] / xs_area_hydroid_lookup['BANKFULL_XSEC_AREA (m2)']) + ## Set bath_cal_xs_area to 0 if the bankfull_XS_ratio_flag is > threshold --> 5x (assuming too large of difference to be a reliable bankfull calculation) + xs_area_hydroid_lookup['bathy_calc_xs_area'].mask(xs_area_hydroid_lookup['bankfull_XS_ratio_flag']>bathy_xsarea_flag,xs_area_hydroid_lookup['BANKFULL_XSEC_AREA (m2)'],inplace=True) + xs_area_hydroid_lookup['barc_on'] = np.where(xs_area_hydroid_lookup['bathy_calc_xs_area'].isnull(), False, True) # field to identify where vmann is on/off + xs_area_hydroid_lookup['bathy_calc_xs_area'].mask(xs_area_hydroid_lookup['bankfull_XS_ratio_flag'].isnull(),0,inplace=True) + + ## Merge bathy_calc_xs_area to the modified_src_base + modified_src_base = modified_src_base.merge(xs_area_hydroid_lookup.loc[:,['HydroID','bathy_calc_xs_area','barc_on']],how='left',on='HydroID') + + ## Mask/null the bathy calculated area for streamorders that the user wants to ignore (set bathy_cals_xs_area = 0 for streamorder = 10) + modified_src_base['bathy_calc_xs_area'].mask(modified_src_base['order_'] >= ignore_streamorder,0.0,inplace=True) + + ## Calculate new bathy adjusted channel geometry variables + modified_src_base = modified_src_base.rename(columns={'Discharge (m3s-1)':'orig_Discharge (m3s-1)','XS Area (m2)':'orig_XS Area (m2)','Volume (m3)':'orig_Volume (m3)','WetArea (m2)':'orig_WetArea (m2)','HydraulicRadius (m)':'orig_HydraulicRadius (m)'}) + modified_src_base['XS Area (m2)'] = modified_src_base['orig_XS Area (m2)'] + modified_src_base['bathy_calc_xs_area'] + modified_src_base['Volume (m3)'] = modified_src_base['XS Area (m2)'] * modified_src_base['LENGTHKM'] * 1000 + modified_src_base['WetArea (m2)'] = modified_src_base['Volume (m3)']/modified_src_base['LENGTHKM']/1000 + modified_src_base['HydraulicRadius (m)'] = modified_src_base['WetArea (m2)']/modified_src_base['WettedPerimeter (m)'] + modified_src_base['HydraulicRadius (m)'].fillna(0, inplace=True) + ## mask out negative top width differences (avoid thalweg burn notch) + modified_src_base['HydraulicRadius (m)'].mask((modified_src_base['HydraulicRadius (m)']>thal_hyd_radius_flag) & (modified_src_base['Stage'] do we need SRC to start at 0?? + modified_src_base['Discharge (m3s-1)'].mask(modified_src_base['Stage'] == 0,0,inplace=True) + modified_src_base['Discharge (m3s-1)'].mask(modified_src_base['Stage'] == modified_src_base['Thalweg_burn_elev'],0,inplace=True) + modified_src_base['Discharge (m3s-1)'].mask(modified_src_base['Stage'] < modified_src_base['Thalweg_burn_elev'],-999,inplace=True) + + ## Organize bathy calc output variables for csv + output_bathy = output_bathy[['HydroID','order_','Stage','SurfaceArea (m2)','TopWidth (m)','BANKFULL_WIDTH (m)','Top Width Diff (m)','XS Area (m2)','BANKFULL_XSEC_AREA (m2)','XS Area Diff (m2)','XS Bankfull Area Ratio','count','median_stage_bankfull','mean_xs_area_ratio']] + + ## Export bathy/bankful calculation tables for easy viewing + output_bathy.to_csv(output_bathy_fileName,index=False) + stream_order_bathy_ratio.to_csv(output_bathy_streamorder_fileName,index=True) + find_thalweg_notch.to_csv(output_bathy_thalweg_fileName,index=True) + xs_area_hydroid_lookup.to_csv(output_bathy_xs_lookup_fileName,index=True) + + ## Output new src_full_crosswalked + modified_src_base.to_csv(out_src_filename,index=False) + ## Update the hydroTable + modified_hydro_table = modified_src_base.loc[:,['HydroID','Stage','barc_on','Volume (m3)','WetArea (m2)','HydraulicRadius (m)','Discharge (m3s-1)']] + modified_hydro_table.rename(columns={'Stage' : 'stage','Discharge (m3s-1)':'discharge_cms'},inplace=True) + df_htable = pd.read_csv(input_htable_fileName,dtype={'HUC': str}) + df_htable.drop(['barc_on'], axis=1, inplace=True) # drop the default "barc_on" variable from add_crosswalk.py + if not set(['orig_discharge_cms','orig_Volume (m3)','orig_WetArea (m2)','orig_HydraulicRadius (m)']).issubset(df_htable.columns): # check if "orig_" attributes do NOT already exist (likely generated from previous BARC run) + df_htable.rename(columns={'discharge_cms':'orig_discharge_cms','Volume (m3)':'orig_Volume (m3)','WetArea (m2)':'orig_WetArea (m2)','HydraulicRadius (m)':'orig_HydraulicRadius (m)'},inplace=True) + else: + df_htable.drop(['discharge_cms','Volume (m3)','WetArea (m2)','HydraulicRadius (m)'], axis=1, inplace=True) # drop the previously modified columns - to be replaced with updated version + df_htable = df_htable.merge(modified_hydro_table, how='left', left_on=['HydroID','stage'], right_on=['HydroID','stage']) + df_htable.to_csv(input_htable_fileName,index=False) + log_text += ('Output new hydroTable and src_full_crosswalked: ') + '\n' + log_text += ('Completed Bathy Calculations: ') + str(huc) +'\n#################\n' + + ## plot rating curves (optional arg) + if src_plot_option == 'True': + if isdir(huc_plot_output_dir) == False: + os.mkdir(huc_plot_output_dir) + generate_src_plot(df_htable, huc_plot_output_dir) + + return(log_text) + + +def generate_src_plot(df_src, plt_out_dir): + + ## create list of unique hydroids + hydroids = df_src.HydroID.unique().tolist() + + ## plot each hydroid SRC in the huc + for hydroid in hydroids: + print("Creating SRC plot: " + str(hydroid)) + plot_df = df_src.loc[df_src['HydroID'] == hydroid] + + f, ax = plt.subplots(figsize=(6.5, 6.5)) + ax.set_title(str(hydroid)) + sns.despine(f, left=True, bottom=True) + sns.scatterplot(x='orig_discharge_cms', y='stage', data=plot_df, label="Orig SRC", ax=ax, color='blue') + sns.scatterplot(x='discharge_cms', y='stage', data=plot_df, label="SRC w/ BARC", ax=ax, color='orange') + #sns.lineplot(x='discharge_1_5', y='Stage_1_5', data=plot_df, color='green', ax=ax) + #plt.fill_between(plot_df['discharge_1_5'], plot_df['Stage_1_5'],alpha=0.5) + #plt.text(plot_df['discharge_1_5'].median(), plot_df['Stage_1_5'].median(), "NWM 1.5yr: " + str(plot_df['Stage_1_5'].median())) + ax.legend() + plt.savefig(plt_out_dir + os.sep + str(hydroid) + '_barc.png',dpi=175, bbox_inches='tight') + plt.close() + +def multi_process(bathy_rc_lookup, procs_list): + print(f"Applying bathy adjustment calcs for {len(procs_list)} hucs using {number_of_jobs} jobs") + with Pool(processes=number_of_jobs) as pool: + map_output = pool.map(bathy_rc_lookup, procs_list) + #log_file.write(str(map_output)) + log_file.writelines(["%s\n" % item for item in map_output]) + +if __name__ == '__main__': + #output_src,input_bathy_fileName,output_bathy_fileName,output_bathy_streamorder_fileName,output_bathy_thalweg_fileName,output_bathy_xs_lookup_fileName + parser = argparse.ArgumentParser(description="Estimate the unaccounted for channel bathymetry using a regression-based estimate of channel XSec Area") + parser.add_argument('-fim_dir','--fim-dir', help='FIM output dir', required=True,type=str) + parser.add_argument('-bfull_geom','--bankfull-xsec-input',help='Regression dataset w/ bankfull geometry by featureid (topwidth & xsec area)',required=True,type=str) + parser.add_argument('-j','--number-of-jobs',help='number of workers',required=False,default=1,type=int) + parser.add_argument('-plots','--src-plot-option',help='Optional (True or False): use this flag to create src plots for all hydroids. WARNING - long runtime',required=False,default='False',type=str) + + args = vars(parser.parse_args()) + + fim_dir = args['fim_dir'] + bankfull_regres_filepath = args['bankfull_xsec_input'] + number_of_jobs = args['number_of_jobs'] + src_plot_option = args['src_plot_option'] + procs_list = [] + + print('STARTING Bathy Adjusted Rating Curve routine...') + ## Check that the input bankfull geom filepath exists and then read it to dataframe + if not isfile(bankfull_regres_filepath): + print('!!! Can not find the input bankfull geometry regression file: ' + str(bankfull_regres_filepath)) + else: + ## Read the Manning's n csv (ensure that it contains feature_id, channel mannings, floodplain mannings) + print('Importing the bankfull regression data file: ' + bankfull_regres_filepath) + df_bfull_geom = pd.read_csv(bankfull_regres_filepath,dtype= {'COMID': int}) + if 'COMID' not in df_bfull_geom.columns and 'feature_id' not in df_bfull_geom.columns: + print('Missing required data column ("feature_id" or "COMID")!!! --> ' + df_bfull_geom) + else: + print('Running Bathy Adjust Rating Curve (BARC)...') + + ## Print message to user and initiate run clock + print('Writing progress to log file here: ' + str(join(fim_dir,'log_BARC.log'))) + print('This may take a few minutes...') + ## Create a time var to log run time + begin_time = dt.datetime.now() + + ## Loop through hucs in the fim_dir and create list of variables to feed to multiprocessing + huc_list = os.listdir(fim_dir) + huc_pass_list = [] + for huc in huc_list: + if huc != 'logs' and huc[-3:] != 'log' and huc[-4:] != '.csv': + #output_src,input_bathy_fileName,output_bathy_fileName,output_bathy_streamorder_fileName,output_bathy_thalweg_fileName,output_bathy_xs_lookup_fileName + in_src_filename = join(fim_dir,huc,'src_full_crosswalked.csv') + out_src_filename = join(fim_dir,huc,'src_full_crosswalked_BARC.csv') + htable_filename = join(fim_dir,huc,'hydroTable.csv') + output_bath_filename = join(fim_dir,huc,'bathy_crosswalk_calcs.csv') + output_bathy_thalweg_fileName = join(fim_dir,huc,'bathy_thalweg_flag.csv') + output_bathy_streamorder_fileName = join(fim_dir,huc,'bathy_stream_order_calcs.csv') + output_bathy_thalweg_fileName = join(fim_dir,huc,'bathy_thalweg_flag.csv') + output_bathy_xs_lookup_fileName = join(fim_dir,huc,'bathy_xs_area_hydroid_lookup.csv') + huc_plot_output_dir = join(fim_dir,huc,'src_plots') + + if isfile(in_src_filename): + print(str(huc)) + huc_pass_list.append(str(huc)) + procs_list.append([in_src_filename,df_bfull_geom,output_bath_filename,output_bathy_streamorder_fileName,output_bathy_thalweg_fileName,output_bathy_xs_lookup_fileName,htable_filename,out_src_filename,huc,src_plot_option,huc_plot_output_dir]) + else: + print(str(huc) + ' --> can not find the src_full_crosswalked.csv in the fim output dir: ' + str(join(fim_dir,huc))) + + ## initiate log file + print(f"Applying bathy adjustment calcs for {len(procs_list)} hucs using {number_of_jobs} jobs...") + sys.__stdout__ = sys.stdout + log_file = open(join(fim_dir,'logs','log_barc.log'),"w") + sys.stdout = log_file + log_file.write('START TIME: ' + str(begin_time) + '\n') + log_file.writelines(["%s\n" % item for item in huc_pass_list]) + + ## Write env variables to log files + log_file.write('sa_ratio_flag = ' + str(sa_ratio_flag) + ' --> Flag: Surface area ratio value to identify possible thalweg notch "jump" (SA x+1 / SA x)' + '\n') + log_file.write('thal_stg_limit = ' + str(thal_stg_limit) + ' --> Threshold: Stage value limit below which to look for the surface area ratio flag (only flag thalweg notch below this threshold)' + '\n') + log_file.write('bankful_xs_ratio_flag = ' + str(bankful_xs_ratio_flag) + ' --> Flag: Identify bogus BARC adjusted values where the regression bankfull XS Area/SRC bankfull area is > threshold (topwidth crosswalk issues or bad bankfull regression data points??)' + '\n') + log_file.write('bathy_xsarea_flag = ' + str(bathy_xsarea_flag) + ' --> Flag: Cross section area limit to cap the amount of bathy XS area added to the SRC. Limits the bathy_calc_xs_area/ BANKFULL_XSEC_AREA to the specified threshold' + '\n') + log_file.write('thal_hyd_radius_flag = ' + str(thal_hyd_radius_flag) + ' --> Flag: Idenitify possible erroneous BARC-adjusted hydraulic radius values. BARC discharge values greater than the specified threshold and within the thal_stg_limit are set to 0' + '\n') + log_file.write('ignore_streamorder = ' + str(ignore_streamorder) + ' --> Do not perform BARC for streamorders >= provided value' + '\n') + log_file.write('#########################################################\n\n') + + ## Pass huc procs_list to multiprocessing function + multi_process(bathy_rc_lookup, procs_list) + + ## Record run time and close log file + end_time = dt.datetime.now() + log_file.write('END TIME: ' + str(end_time) + '\n') + tot_run_time = end_time - begin_time + log_file.write('TOTAL RUN TIME: ' + str(tot_run_time)) + sys.stdout = sys.__stdout__ + log_file.close() diff --git a/lib/buildstreamtraversal.py b/src/build_stream_traversal.py similarity index 56% rename from lib/buildstreamtraversal.py rename to src/build_stream_traversal.py index 1fcb00d65..b3639ac95 100644 --- a/lib/buildstreamtraversal.py +++ b/src/build_stream_traversal.py @@ -2,16 +2,15 @@ Description: This tool creates unique IDs for each segment and builds the To_Node, From_Node, and NextDownID columns to traverse the network Required Arguments: - modelstream = stream network - WBD8 = HUC8 boundary dataset - HYDROID = name of ID column (string) + streams = stream network + wbd8 = HUC8 boundary dataset + hydro_id = name of ID column (string) ''' import sys -import datetime -import pandas as pd import argparse import geopandas as gpd + def trace(): import traceback, inspect tb = sys.exc_info()[2] @@ -22,71 +21,80 @@ def trace(): synerror = traceback.format_exc().splitlines()[-1] return line, filename, synerror -FN_FROMNODE = "From_Node" -FN_TONODE = "To_Node" -FN_NEXTDOWNID = "NextDownID" +from_node = "From_Node" +to_node = "To_Node" +next_down_id = "NextDownID" + -class BuildStreamTraversalColumns(object): +class build_stream_traversal_columns(object): '''Tool class for updating the next down IDs of stream features.''' def __init__(self): '''Define tool properties (tool name is the class name).''' self.label = 'Find Next Downstream Line' self.description = '''Finds next downstream line, retrieves its HydroID and stores it in the NextDownID field.''' - def execute(self, modelstream, WBD8, HYDROID): + def execute(self, streams, wbd8, hydro_id): try: split_code = 1 sOK = 'OK' # check for HydroID; Assign if it doesn't exist - if not HYDROID in modelstream.columns: - print ("Required field " + HYDROID + " does not exist in input. Generating..") - - stream_centroid = gpd.GeoDataFrame({'geometry':modelstream.geometry.centroid}, crs=modelstream.crs, geometry='geometry') - stream_wbdjoin = gpd.sjoin(stream_centroid, WBD8, how='left', op='within') - stream_wbdjoin = stream_wbdjoin.rename(columns={"geometry": "centroid", "index_right": "HUC8id"}) - modelstream = modelstream.join(stream_wbdjoin).drop(columns=['centroid']) - - modelstream['seqID'] = (modelstream.groupby('HUC8id').cumcount(ascending=True)+1).astype('str').str.zfill(4) - modelstream = modelstream.loc[modelstream['HUC8id'].notna(),:] - modelstream = modelstream.assign(HYDROID= lambda x: x.HUC8id + x.seqID) - modelstream = modelstream.rename(columns={"HYDROID": HYDROID}).sort_values(HYDROID) - modelstream = modelstream.drop(columns=['HUC8id', 'seqID']) - modelstream[HYDROID] = modelstream[HYDROID].astype(int) - print ('Generated ' + HYDROID) + if not hydro_id in streams.columns: + print ("Required field " + hydro_id + " does not exist in input. Generating..") + + # Get stream midpoint + stream_midpoint = [] + for i,lineString in enumerate(streams.geometry): + stream_midpoint = stream_midpoint + [lineString.interpolate(0.5,normalized=True)] + + stream_md_gpd = gpd.GeoDataFrame({'geometry':stream_midpoint}, crs=streams.crs, geometry='geometry') + stream_wbdjoin = gpd.sjoin(stream_md_gpd, wbd8, how='left', op='within') + stream_wbdjoin = stream_wbdjoin.rename(columns={"geometry": "midpoint", "index_right": "HUC8id"}) + streams = streams.join(stream_wbdjoin).drop(columns=['midpoint']) + + streams['seqID'] = (streams.groupby('HUC8id').cumcount(ascending=True)+1).astype('str').str.zfill(4) + streams = streams.loc[streams['HUC8id'].notna(),:] + if streams.HUC8id.dtype != 'str': streams.HUC8id = streams.HUC8id.astype(str) + if streams.seqID.dtype != 'str': streams.seqID = streams.seqID.astype(str) + + streams = streams.assign(hydro_id= lambda x: x.HUC8id + x.seqID) + streams = streams.rename(columns={"hydro_id": hydro_id}).sort_values(hydro_id) + streams = streams.drop(columns=['HUC8id', 'seqID']) + streams[hydro_id] = streams[hydro_id].astype(int) + print ('Generated ' + hydro_id) # Check for TO/From Nodes; Assign if doesnt exist bOK = True - if not FN_FROMNODE in modelstream.columns: - print ("Field " + FN_FROMNODE + " does not exist in input ") + if not from_node in streams.columns: + print ("Field " + from_node + " does not exist in input ") bOK = False - if not FN_TONODE in modelstream.columns: - print ("Field " + FN_TONODE + " does not exist in input. Generating..") + if not to_node in streams.columns: + print ("Field " + to_node + " does not exist in input. Generating..") bOK = False if(bOK==False): # Add fields if not they do not exist. - if not FN_FROMNODE in modelstream.columns: - modelstream[FN_FROMNODE] = '' + if not from_node in streams.columns: + streams[from_node] = '' - if not FN_TONODE in modelstream.columns: - modelstream[FN_TONODE] = '' + if not to_node in streams.columns: + streams[to_node] = '' - modelstream = modelstream.sort_values(by=[HYDROID], ascending=True).copy() + streams = streams.sort_values(by=[hydro_id], ascending=True).copy() xy_dict = {} bhasnullshape=False - for rows in modelstream[['geometry', FN_FROMNODE, FN_TONODE]].iterrows(): + for rows in streams[['geometry', from_node, to_node]].iterrows(): if rows[1][0]: # From Node firstx = round(rows[1][0].coords.xy[0][0], 7) firsty = round(rows[1][0].coords.xy[1][0], 7) from_key = '{},{}'.format(firstx, firsty) if from_key in xy_dict: - modelstream.at[rows[0], FN_FROMNODE,] = xy_dict[from_key] + streams.at[rows[0], from_node,] = xy_dict[from_key] else: xy_dict[from_key] = len(xy_dict) + 1 - modelstream.at[rows[0], FN_FROMNODE,] = xy_dict[from_key] + streams.at[rows[0], from_node,] = xy_dict[from_key] # To Node lastx = round(rows[1][0].coords.xy[0][-1], 7) @@ -94,27 +102,27 @@ def execute(self, modelstream, WBD8, HYDROID): to_key = '{},{}'.format(lastx, lasty) #if xy_dict.has_key(to_key): if to_key in xy_dict: - modelstream.at[rows[0], FN_TONODE] = xy_dict[to_key] + streams.at[rows[0], to_node] = xy_dict[to_key] else: xy_dict[to_key] = len(xy_dict) + 1 - modelstream.at[rows[0], FN_TONODE] = xy_dict[to_key] + streams.at[rows[0], to_node] = xy_dict[to_key] else: bhasnullshape=True if bhasnullshape==True: print ("Some of the input features have a null shape.") - print (FN_FROMNODE + " and " + FN_TONODE + " fields cannot be populated for those features.") + print (from_node + " and " + to_node + " fields cannot be populated for those features.") else: print ('Generated To/From Nodes') # Create NextDownID field - if not FN_NEXTDOWNID in modelstream.columns: - modelstream[FN_NEXTDOWNID] = '' + if not next_down_id in streams.columns: + streams[next_down_id] = '' - # Create dict to store FN_FROMNODE values for each HydroID + # Create dict to store from_node values for each HydroID dnodes=dict() lstHydroIDs=[] - for row in modelstream[[FN_FROMNODE,HYDROID]].iterrows(): + for row in streams[[from_node,hydro_id]].iterrows(): if (row[1][0] in dnodes)==False: lstHydroIDs=[row[1][1]] @@ -124,7 +132,7 @@ def execute(self, modelstream, WBD8, HYDROID): lstHydroIDs.append(row[1][1]) # for each stream segment, search dict for HydroID downstream and - for urow in modelstream[[FN_NEXTDOWNID, FN_TONODE, FN_FROMNODE, HYDROID]].iterrows(): + for urow in streams[[next_down_id, to_node, from_node, hydro_id]].iterrows(): tonodecol = urow[1][1] nextdownIDcol = urow[1][0] hydroIDcol = urow[1][3] @@ -152,32 +160,30 @@ def execute(self, modelstream, WBD8, HYDROID): if next_down_ids:del next_down_ids except: pass - modelstream.loc[modelstream[HYDROID]== hydroIDcol,[FN_NEXTDOWNID]] = nextdownIDcol + streams.loc[streams[hydro_id]== hydroIDcol,[next_down_id]] = nextdownIDcol - tReturns = (sOK, modelstream) + tReturns = (sOK, streams) except Exception: sOK = "{}".format(trace()) tReturns = (sOK, ) return tReturns -if(__name__=='__main__'): + +if __name__ == '__main__': try: ap = argparse.ArgumentParser() ap.add_argument("-p", "--parameters", nargs='+', default=[], required=True, help="list of parameters") args = ap.parse_args() - modelstream = args.parameters[0] - WBD8 = args.parameters[1] - HYDROID = args.parameters[2] + streams = args.parameters[0] + wbd8 = args.parameters[1] + hydro_id = args.parameters[2] - oProcessor = BuildStreamTraversalColumns() - params = (modelstream, WBD8, HYDROID) + oProcessor = build_stream_traversal_columns() + params = (streams, wbd8, hydro_id) tResults=None tResults = oProcessor.execute(params) del oProcessor except: print (str(trace())) - finally: - dt = datetime.datetime.now() - print ('Finished at ' + dt.strftime("%Y-%m-%d %H:%M:%S")) diff --git a/src/burn_in_levees.py b/src/burn_in_levees.py new file mode 100755 index 000000000..93acbc1e4 --- /dev/null +++ b/src/burn_in_levees.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 + +import rasterio +import numpy as np +import argparse +from utils.shared_functions import mem_profile + + +@mem_profile +def burn_in_levees(dem_filename,nld_filename,out_dem_filename): + + #TODO Document this code + dem = rasterio.open(dem_filename) + nld = rasterio.open(nld_filename) + + dem_data = dem.read(1) + nld_data = nld.read(1) + + no_data = nld.nodata + + nld_m = np.where(nld_data == int(no_data), -9999.0, (nld_data*0.3048).astype(rasterio.float32)) + + dem_profile = dem.profile.copy() + + dem_nld_burn = np.maximum(dem_data, nld_m) + + + with rasterio.open(out_dem_filename, "w", **dem_profile, BIGTIFF='YES') as dest: + dest.write(dem_nld_burn, indexes = 1) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Burn in NLD elevations') + parser.add_argument('-dem','--dem-filename', help='DEM filename', required=True,type=str) + parser.add_argument('-nld','--nld-filename', help='NLD filename', required=True,type=str) + parser.add_argument('-out','--out-dem-filename', help='out DEM filename', required=True,type=str) + + args = vars(parser.parse_args()) + + dem_filename = args['dem_filename'] + nld_filename = args['nld_filename'] + out_dem_filename = args['out_dem_filename'] + + burn_in_levees(dem_filename,nld_filename,out_dem_filename) diff --git a/lib/check_huc_inputs.py b/src/check_huc_inputs.py similarity index 93% rename from lib/check_huc_inputs.py rename to src/check_huc_inputs.py index b1b83e7c0..cd471cd88 100755 --- a/lib/check_huc_inputs.py +++ b/src/check_huc_inputs.py @@ -4,8 +4,9 @@ import argparse from glob import glob + def __read_included_files(parent_dir_path): - + filename_patterns = glob(os.path.join(parent_dir_path,'included_huc*.lst')) accepted_hucs_set = set() @@ -23,7 +24,7 @@ def __read_included_files(parent_dir_path): def __read_input_hucs(hucs): - + hucs = [h.split() for h in hucs][0] if os.path.isfile(hucs[0]): with open(hucs[0],'r') as hucs_file: @@ -42,10 +43,11 @@ def __check_for_membership(hucs,accepted_hucs_set): def check_hucs(hucs): - accepted_hucs = __read_included_files(os.environ['inputDataDir']) + accepted_hucs = __read_included_files(os.path.join(os.environ['inputDataDir'],'huc_lists')) hucs = __read_input_hucs(hucs) __check_for_membership(hucs,accepted_hucs) + if __name__ == '__main__': # parse arguments @@ -54,6 +56,6 @@ def check_hucs(hucs): # extract to dictionary args = vars(parser.parse_args()) - + # call function check_hucs(**args) diff --git a/src/clip_vectors_to_wbd.py b/src/clip_vectors_to_wbd.py new file mode 100755 index 000000000..e5053627a --- /dev/null +++ b/src/clip_vectors_to_wbd.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python3 + +import sys +import geopandas as gpd +import argparse +from shapely.geometry import MultiPolygon,Polygon +from utils.shared_functions import getDriver, mem_profile + + +@mem_profile +def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,extent,great_lakes_filename,wbd_buffer_distance,lake_buffer_distance): + + hucUnitLength = len(str(hucCode)) + + # Get wbd buffer + wbd = gpd.read_file(wbd_filename) + wbd_buffer = wbd.copy() + wbd_buffer.geometry = wbd.geometry.buffer(wbd_buffer_distance,resolution=32) + projection = wbd_buffer.crs + + great_lakes = gpd.read_file(great_lakes_filename, mask = wbd_buffer).reset_index(drop=True) + + if not great_lakes.empty: + print("Masking Great Lakes for HUC{} {}".format(hucUnitLength,hucCode),flush=True) + + # Clip excess lake area + great_lakes = gpd.clip(great_lakes, wbd_buffer) + + # Buffer remaining lake area + great_lakes.geometry = great_lakes.buffer(lake_buffer_distance) + + # Removed buffered GL from WBD buffer + wbd_buffer = gpd.overlay(wbd_buffer, great_lakes, how='difference') + wbd_buffer = wbd_buffer[['geometry']] + wbd_buffer.to_file(wbd_buffer_filename,driver=getDriver(wbd_buffer_filename),index=False) + + else: + wbd_buffer = wbd_buffer[['geometry']] + wbd_buffer.to_file(wbd_buffer_filename,driver=getDriver(wbd_buffer_filename),index=False) + + del great_lakes + + # Clip ocean water polygon for future masking ocean areas (where applicable) + landsea = gpd.read_file(landsea_filename, mask = wbd_buffer) + if not landsea.empty: + landsea.to_file(subset_landsea_filename,driver=getDriver(subset_landsea_filename),index=False) + del landsea + + # Find intersecting lakes and writeout + print("Subsetting NWM Lakes for HUC{} {}".format(hucUnitLength,hucCode),flush=True) + nwm_lakes = gpd.read_file(nwm_lakes_filename, mask = wbd_buffer) + nwm_lakes = nwm_lakes.loc[nwm_lakes.Shape_Area < 18990454000.0] + + if not nwm_lakes.empty: + # Perform fill process to remove holes/islands in the NWM lake polygons + nwm_lakes = nwm_lakes.explode() + nwm_lakes_fill_holes=MultiPolygon(Polygon(p.exterior) for p in nwm_lakes['geometry']) # remove donut hole geometries + # Loop through the filled polygons and insert the new geometry + for i in range(len(nwm_lakes_fill_holes)): + nwm_lakes.loc[i,'geometry'] = nwm_lakes_fill_holes[i] + nwm_lakes.to_file(subset_nwm_lakes_filename,driver=getDriver(subset_nwm_lakes_filename),index=False) + del nwm_lakes + + # Find intersecting levee lines + print("Subsetting NLD levee lines for HUC{} {}".format(hucUnitLength,hucCode),flush=True) + nld_lines = gpd.read_file(nld_lines_filename, mask = wbd_buffer) + if not nld_lines.empty: + nld_lines.to_file(subset_nld_lines_filename,driver=getDriver(subset_nld_lines_filename),index=False) + del nld_lines + + # Subset nhd headwaters + print("Subsetting NHD Headwater Points for HUC{} {}".format(hucUnitLength,hucCode),flush=True) + nhd_headwaters = gpd.read_file(nhd_headwaters_filename, mask = wbd_buffer) + if extent == 'MS': + nhd_headwaters = nhd_headwaters.loc[nhd_headwaters.mainstem==1] + + if len(nhd_headwaters) > 0: + nhd_headwaters.to_file(subset_nhd_headwaters_filename,driver=getDriver(subset_nhd_headwaters_filename),index=False) + else: + print ("No headwater point(s) within HUC " + str(hucCode) + " boundaries.") + sys.exit(0) + del nhd_headwaters + + # Subset nhd streams + print("Querying NHD Streams for HUC{} {}".format(hucUnitLength,hucCode),flush=True) + nhd_streams = gpd.read_file(nhd_streams_filename, mask = wbd_buffer) + + if extent == 'MS': + nhd_streams = nhd_streams.loc[nhd_streams.mainstem==1] + + if len(nhd_streams) > 0: + + # Find incoming stream segments (to WBD buffer) and identify which are upstream + threshold_segments = gpd.overlay(nhd_streams, wbd_buffer, how='symmetric_difference') + from_list = threshold_segments.FromNode.to_list() + to_list = nhd_streams.ToNode.to_list() + missing_segments = list(set(from_list) - set(to_list)) + + # special case: stream meanders in and out of WBD buffer boundary + if str(hucCode) == '10030203': + missing_segments = missing_segments + [23001300001840.0, 23001300016571.0] + + if str(hucCode) == '08030100': + missing_segments = missing_segments + [20000600011559.0, 20000600045761.0, 20000600002821.0] + + # Remove incoming stream segment so it won't be routed as outflow during hydroconditioning + nhd_streams = nhd_streams.loc[~nhd_streams.FromNode.isin(missing_segments)] + + nhd_streams.to_file(subset_nhd_streams_filename,driver=getDriver(subset_nhd_streams_filename),index=False) + else: + print ("No NHD streams within HUC " + str(hucCode) + " boundaries.") + sys.exit(0) + del nhd_streams + + # Find intersecting nwm_catchments + print("Subsetting NWM Catchments for HUC{} {}".format(hucUnitLength,hucCode),flush=True) + nwm_catchments = gpd.read_file(nwm_catchments_filename, mask = wbd_buffer) + if extent == 'MS': + nwm_catchments = nwm_catchments.loc[nwm_catchments.mainstem==1] + + if len(nwm_catchments) > 0: + nwm_catchments.to_file(subset_nwm_catchments_filename,driver=getDriver(subset_nwm_catchments_filename),index=False) + else: + print ("No NHD catchments within HUC " + str(hucCode) + " boundaries.") + sys.exit(0) + del nwm_catchments + + # Subset nwm streams + print("Subsetting NWM Streams and deriving headwaters for HUC{} {}".format(hucUnitLength,hucCode),flush=True) + nwm_streams = gpd.read_file(nwm_streams_filename, mask = wbd_buffer) + if extent == 'MS': + nwm_streams = nwm_streams.loc[nwm_streams.mainstem==1] + if len(nwm_streams) > 0: + nwm_streams.to_file(subset_nwm_streams_filename,driver=getDriver(subset_nwm_streams_filename),index=False) + else: + print ("No NWM stream segments within HUC " + str(hucCode) + " boundaries.") + sys.exit(0) + del nwm_streams + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Subset vector layers') + parser.add_argument('-d','--hucCode', help='HUC boundary ID', required=True,type=str) + parser.add_argument('-w','--nwm-streams', help='NWM flowlines', required=True) + parser.add_argument('-s','--nhd-streams',help='NHDPlus HR burnline',required=True) + parser.add_argument('-l','--nwm-lakes', help='NWM Lakes', required=True) + parser.add_argument('-r','--nld-lines', help='Levee vectors to use within project path', required=True) + parser.add_argument('-g','--wbd',help='HUC boundary',required=True) + parser.add_argument('-f','--wbd-buffer',help='Buffered HUC boundary',required=True) + parser.add_argument('-m','--nwm-catchments', help='NWM catchments', required=True) + parser.add_argument('-y','--nhd-headwaters',help='NHD headwaters',required=True) + parser.add_argument('-v','--landsea',help='LandSea - land boundary',required=True) + parser.add_argument('-c','--subset-nhd-streams',help='NHD streams subset',required=True) + parser.add_argument('-z','--subset-nld-lines',help='Subset of NLD levee vectors for HUC',required=True) + parser.add_argument('-a','--subset-lakes',help='NWM lake subset',required=True) + parser.add_argument('-n','--subset-catchments',help='NWM catchments subset',required=True) + parser.add_argument('-e','--subset-nhd-headwaters',help='NHD headwaters subset',required=True,default=None) + parser.add_argument('-b','--subset-nwm-streams',help='NWM streams subset',required=True) + parser.add_argument('-x','--subset-landsea',help='LandSea subset',required=True) + parser.add_argument('-extent','--extent',help='FIM extent',required=True) + parser.add_argument('-gl','--great-lakes-filename',help='Great Lakes layer',required=True) + parser.add_argument('-wb','--wbd-buffer-distance',help='WBD Mask buffer distance',required=True,type=int) + parser.add_argument('-lb','--lake-buffer-distance',help='Great Lakes Mask buffer distance',required=True,type=int) + + args = vars(parser.parse_args()) + + hucCode = args['hucCode'] + nwm_streams_filename = args['nwm_streams'] + nhd_streams_filename = args['nhd_streams'] + nwm_lakes_filename = args['nwm_lakes'] + nld_lines_filename = args['nld_lines'] + wbd_filename = args['wbd'] + wbd_buffer_filename = args['wbd_buffer'] + nwm_catchments_filename = args['nwm_catchments'] + nhd_headwaters_filename = args['nhd_headwaters'] + landsea_filename = args['landsea'] + subset_nhd_streams_filename = args['subset_nhd_streams'] + subset_nld_lines_filename = args['subset_nld_lines'] + subset_nwm_lakes_filename = args['subset_lakes'] + subset_nwm_catchments_filename = args['subset_catchments'] + subset_nhd_headwaters_filename = args['subset_nhd_headwaters'] + subset_nwm_streams_filename = args['subset_nwm_streams'] + subset_landsea_filename = args['subset_landsea'] + extent = args['extent'] + great_lakes_filename = args['great_lakes_filename'] + wbd_buffer_distance = args['wbd_buffer_distance'] + lake_buffer_distance = args['lake_buffer_distance'] + + subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,extent,great_lakes_filename,wbd_buffer_distance,lake_buffer_distance) diff --git a/lib/derive_headwaters.py b/src/derive_headwaters.py similarity index 88% rename from lib/derive_headwaters.py rename to src/derive_headwaters.py index e0c39f6b5..1bae7209e 100644 --- a/lib/derive_headwaters.py +++ b/src/derive_headwaters.py @@ -1,7 +1,9 @@ -#!/usr/bin/env·python3 +#!/usr/bin/env python3 import geopandas as gpd from shapely.geometry import Point +from utils.shared_functions import getDriver +import argparse def findHeadWaterPoints(flows): @@ -32,12 +34,6 @@ def findHeadWaterPoints(flows): return(hw_gdf) -def getDriver(fileName): - - driverDictionary = {'.gpkg' : 'GPKG','.geojson' : 'GeoJSON','.shp' : 'ESRI Shapefile'} - driver = driverDictionary[splitext(fileName)[1]] - - return(driver) if __name__ == '__main__': @@ -52,5 +48,6 @@ def getDriver(fileName): hw_gdf = findHeadWaterPoints(flows) + #TODO check output_headwaters variable, not defined if output_headwaters is not None: hw_gdf.to_file(args['output_headwaters'],driver=getDriver(args['output_headwaters'])) diff --git a/lib/entrypoint.sh b/src/entrypoint.sh similarity index 100% rename from lib/entrypoint.sh rename to src/entrypoint.sh diff --git a/src/filter_catchments_and_add_attributes.py b/src/filter_catchments_and_add_attributes.py new file mode 100755 index 000000000..17aa5c611 --- /dev/null +++ b/src/filter_catchments_and_add_attributes.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 + +import argparse +import geopandas as gpd +import numpy as np +import sys +from utils.shared_variables import FIM_ID +from utils.shared_functions import mem_profile + + +@mem_profile +def filter_catchments_and_add_attributes(input_catchments_filename, input_flows_filename, output_catchments_filename, output_flows_filename, wbd_filename, huc_code): + input_catchments = gpd.read_file(input_catchments_filename) + wbd = gpd.read_file(wbd_filename) + input_flows = gpd.read_file(input_flows_filename) + + # filter segments within huc boundary + select_flows = tuple(map(str,map(int,wbd[wbd.HUC8.str.contains(huc_code)][FIM_ID]))) + + if input_flows.HydroID.dtype != 'str': input_flows.HydroID = input_flows.HydroID.astype(str) + output_flows = input_flows[input_flows.HydroID.str.startswith(select_flows)].copy() + if output_flows.HydroID.dtype != 'int': output_flows.HydroID = output_flows.HydroID.astype(int) + + if len(output_flows) > 0: + + # merges input flows attributes and filters hydroids + if input_catchments.HydroID.dtype != 'int': input_catchments.HydroID = input_catchments.HydroID.astype(int) + output_catchments = input_catchments.merge(output_flows.drop(['geometry'],axis=1),on='HydroID') + + # filter out smaller duplicate features + duplicateFeatures = np.where(np.bincount(output_catchments['HydroID'])>1)[0] + + for dp in duplicateFeatures: + + indices_of_duplicate = np.where(output_catchments['HydroID'] == dp)[0] + areas = output_catchments.iloc[indices_of_duplicate,:].geometry.area + indices_of_smaller_duplicates = indices_of_duplicate[np.where(areas != np.amax(areas))[0]] + output_catchments = output_catchments.drop(output_catchments.index[indices_of_smaller_duplicates]) + + # add geometry column + output_catchments['areasqkm'] = output_catchments.geometry.area/(1000**2) + + output_catchments.to_file(output_catchments_filename, driver="GPKG",index=False) + output_flows.to_file(output_flows_filename, driver="GPKG", index=False) + + +if __name__ == '__main__': + + input_catchments_filename = sys.argv[1] + input_flows_filename = sys.argv[2] + output_catchments_filename = sys.argv[3] + output_flows_filename = sys.argv[4] + wbd_filename = sys.argv[5] + huc_code = str(sys.argv[6]) + + # Parse arguments. + parser = argparse.ArgumentParser(description='filter_catchments_and_add_attributes.py') + parser.add_argument('-i', '--input-catchments-filename', help='input-catchments-filename', required=True) + parser.add_argument('-f', '--input-flows-filename', help='input-flows-filename', required=True) + parser.add_argument('-c', '--output-catchments-filename', help='output-catchments-filename', required=True) + parser.add_argument('-o', '--output-flows-filename', help='output-flows-filename', required=True) + parser.add_argument('-w', '--wbd-filename', help='wbd-filename', required=True) + parser.add_argument('-u', '--huc-code', help='huc-code', required=True) + + # Extract to dictionary and assign to variables. + args = vars(parser.parse_args()) + + filter_catchments_and_add_attributes(**args) diff --git a/src/fr_to_ms_raster_mask.py b/src/fr_to_ms_raster_mask.py new file mode 100755 index 000000000..2ebc713f0 --- /dev/null +++ b/src/fr_to_ms_raster_mask.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 + +''' + Description: Mask raster layers using 'mainstems' stream buffer +''' + +import sys +import os +import argparse +import geopandas as gpd +import rasterio.mask +from utils.shared_functions import mem_profile + + +@mem_profile +def fr_to_ms_raster_mask(ms_buffer_dist, split_flows_filename, fdr_fr, dem_fr, slope_fr, fdr_ms_filename, dem_ms_filename, slope_ms_filename, str_pixel_fr, str_pixel_ms_filename): + # create output layer names + split_flows = gpd.read_file(split_flows_filename) + + # Limit the rasters to the buffer distance around the draft streams. + print ("Limiting rasters to buffer area ({} meters) around model streams".format(str(ms_buffer_dist))) + + split_flows_ms_buffer = split_flows.unary_union.buffer(ms_buffer_dist) + + print('Writing raster outputs ...') + + # Mask nhddem + with rasterio.open(dem_fr) as src: + out_image, out_transform = rasterio.mask.mask(src, [split_flows_ms_buffer], crop=True) + out_meta = src.meta + + out_meta.update({"driver": "GTiff", + "height": out_image.shape[1], + "width": out_image.shape[2], + "transform": out_transform}) + + with rasterio.open(os.path.join(os.path.dirname(dem_fr), dem_ms_filename), "w", **out_meta) as dest: + dest.write(out_image) + + # Mask fdr + with rasterio.open(fdr_fr) as src: + out_image, out_transform = rasterio.mask.mask(src, [split_flows_ms_buffer], crop=True) + out_meta = src.meta + + out_meta.update({"driver": "GTiff", + "height": out_image.shape[1], + "width": out_image.shape[2], + "transform": out_transform}) + + with rasterio.open(os.path.join(os.path.dirname(fdr_fr), fdr_ms_filename), "w", **out_meta) as dest: + dest.write(out_image) + + # Mask slope + with rasterio.open(slope_fr) as src: + out_image, out_transform = rasterio.mask.mask(src, [split_flows_ms_buffer], crop=True) + out_meta = src.meta + + out_meta.update({"driver": "GTiff", + "height": out_image.shape[1], + "width": out_image.shape[2], + "transform": out_transform}) + + with rasterio.open(os.path.join(os.path.dirname(slope_fr), slope_ms_filename), "w", **out_meta) as dest: + dest.write(out_image) + + # Mask stream pixels + with rasterio.open(str_pixel_fr) as src: + out_image, out_transform = rasterio.mask.mask(src, [split_flows_ms_buffer], crop=True) + out_meta = src.meta + + out_meta.update({"driver": "GTiff", + "height": out_image.shape[1], + "width": out_image.shape[2], + "transform": out_transform}) + + with rasterio.open(os.path.join(os.path.dirname(str_pixel_fr), str_pixel_ms_filename), "w", **out_meta) as dest: + dest.write(out_image) + + +if __name__ == '__main__': + ms_buffer_dist = int(os.environ['ms_buffer_dist']) + + # Parse arguments. + parser = argparse.ArgumentParser(description='fr_to_ms_raster_mask.py') + parser.add_argument('-s', '--split-flows-filename', help='split-flows-filename', required=True) + parser.add_argument('-f', '--fdr-fr', help='fdr-fr', required=True) + parser.add_argument('-d', '--dem-fr', help='dem-fr', required=True) + parser.add_argument('-r', '--slope-fr', help='slope-fr', required=True) + parser.add_argument('-m', '--fdr-ms-filename', help='fdr-ms-filename', required=True) + parser.add_argument('-n', '--dem-ms-filename', help='dem-ms-filename', required=True) + parser.add_argument('-o', '--slope-ms-filename', help='slope-ms-filename', required=True) + parser.add_argument('-p', '--str-pixel-fr', help='str-pixel-fr', required=True) + parser.add_argument('-q', '--str-pixel-ms-filename', help='str-pixel-ms-filename', required=True) + + # Extract to dictionary and assign to variables. + args = vars(parser.parse_args()) + + fr_to_ms_raster_mask(ms_buffer_dist, **args) diff --git a/lib/getRasterInfoNative.py b/src/getRasterInfoNative.py similarity index 98% rename from lib/getRasterInfoNative.py rename to src/getRasterInfoNative.py index 27c1a3e73..11e93e322 100755 --- a/lib/getRasterInfoNative.py +++ b/src/getRasterInfoNative.py @@ -1,6 +1,8 @@ #!/usr/bin/env python3 # -*- coding: utf-8 +#TODO standardize this script + import gdal import os.path import sys @@ -10,6 +12,7 @@ read fsize ncols nrows ndv xmin ymin xmax ymax cellsize_resx cellsize_resy <<< $(./getRasterInfoNative.py ) """ + def GetExtent(gt,cols,rows): ''' Return list of corner coordinates from a geotransform @@ -35,6 +38,7 @@ def GetExtent(gt,cols,rows): yarr.reverse() return ext + def ReprojectCoords(coords,src_srs,tgt_srs): ''' Reproject a list of x,y coordinates. diff --git a/lib/get_all_huc_in_inputs.py b/src/get_all_huc_in_inputs.py similarity index 100% rename from lib/get_all_huc_in_inputs.py rename to src/get_all_huc_in_inputs.py index 78be2ce7b..af507b10b 100755 --- a/lib/get_all_huc_in_inputs.py +++ b/src/get_all_huc_in_inputs.py @@ -8,6 +8,7 @@ from os.path import join from tqdm import tqdm + def find_unique_hucs(inputsDir,hucLength): # get file list with glob @@ -24,7 +25,6 @@ def find_unique_hucs(inputsDir,hucLength): unique_hucs.to_csv(join(inputsDir,'included_huc{}.lst'.format(hucLength)),header=False,index=False) - if __name__ == '__main__': parser = argparse.ArgumentParser(description='Get unique HUCs in results data dir') parser.add_argument('-i','--inputs-directory',help='Basins polygons to use within project path',required=True) diff --git a/src/identify_src_bankfull.py b/src/identify_src_bankfull.py new file mode 100755 index 000000000..3be19fae1 --- /dev/null +++ b/src/identify_src_bankfull.py @@ -0,0 +1,221 @@ +#!/usr/bin/env python3 + +import os +import sys +import pandas as pd +import argparse +import matplotlib.pyplot as plt +import seaborn as sns +from functools import reduce +from multiprocessing import Pool +from os.path import isfile, join, dirname, isdir +import shutil +import warnings +from pathlib import Path +import datetime as dt +sns.set_theme(style="whitegrid") +warnings.simplefilter(action='ignore', category=FutureWarning) + +""" + Identify the SRC bankfull stage values using the NWM 1.5yr flows + + Parameters + ---------- + fim_dir : str + Directory containing FIM output folders. + bankfull_flow_dir : str + Directory containing "bankfull" flows files (e.g. NWM 1.5yr recurr). + number_of_jobs : str + Number of jobs. + plots : str + Flag to create SRC plots for all hydroids (True/False) +""" + +def src_bankfull_lookup(args): + + src_full_filename = args[0] + src_modify_filename = args[1] + df_bflows = args[2] + huc = args[3] + src_plot_option = args[4] + huc_output_dir = args[5] + + ## Read the src_full_crosswalked.csv + #print('Processing: ' + str(huc)) + log_text = 'Calculating: ' + str(huc) + '\n' + df_src = pd.read_csv(src_full_filename,dtype={'HydroID': int,'feature_id': int}) + + ## NWM recurr rename discharge var + df_bflows = df_bflows.rename(columns={'discharge':'discharge_1_5'}) + + ## Combine the nwm 1.5yr flows into the SRC via feature_id + df_src = df_src.merge(df_bflows,how='left',on='feature_id') + + ## Check if there are any missing data, negative or zero flow values in the discharge_1_5 + check_null = df_src['discharge_1_5'].isnull().sum() + if check_null > 0: + log_text += 'Missing feature_id in crosswalk for huc: ' + str(huc) + ' --> these featureids will be ignored in bankfull calcs (~' + str(check_null/84) + ' features) \n' + ## Fill missing/nan nwm discharge_1_5 values with -999 to handle later + df_src['discharge_1_5'] = df_src['discharge_1_5'].fillna(-999) + negative_flows = len(df_src.loc[(df_src.discharge_1_5 <= 0) & (df_src.discharge_1_5 != -999)]) + if negative_flows > 0: + log_text += 'HUC: ' + str(huc) + ' --> Negative or zero flow values found (likely lakeid loc)\n' + + ## Define the channel geometry variable names to use from the src + hradius_var = 'HydraulicRadius (m)' + volume_var = 'Volume (m3)' + + ## Locate the closest SRC discharge value to the NWM 1.5yr flow + df_src['Q_1_5_find'] = (df_src['discharge_1_5'] - df_src['Discharge (m3s-1)']).abs() + + ## Check for any missing/null entries in the input SRC + if df_src['Q_1_5_find'].isnull().values.any(): # there may be null values for lake or coastal flow lines (need to set a value to do groupby idxmin below) + log_text += 'HUC: ' + str(huc) + ' --> Null values found in "Q_1_5_find" calc. These will be filled with 999999 () \n' + ## Fill missing/nan nwm 'Discharge (m3s-1)' values with 999999 to handle later + df_src['Q_1_5_find'] = df_src['Q_1_5_find'].fillna(999999) + if df_src['HydroID'].isnull().values.any(): + log_text += 'HUC: ' + str(huc) + ' --> Null values found in "HydroID"... \n' + + df_1_5 = df_src[['Stage','HydroID',volume_var,hradius_var,'Q_1_5_find']] # create new subset df to perform the Q_1_5 lookup + df_1_5 = df_1_5[df_1_5['Stage'] > 0.0] # Ensure bankfull stage is greater than stage=0 + df_1_5.reset_index(drop=True, inplace=True) + df_1_5 = df_1_5.loc[df_1_5.groupby('HydroID')['Q_1_5_find'].idxmin()].reset_index(drop=True) # find the index of the Q_1_5_find (closest matching flow) + df_1_5 = df_1_5.rename(columns={'Stage':'Stage_1_5',volume_var:'Volume_bankfull',hradius_var:'HRadius_bankfull'}) # rename volume to use later for channel portion calc + df_src = df_src.merge(df_1_5[['Stage_1_5','HydroID','Volume_bankfull','HRadius_bankfull']],how='left',on='HydroID') + df_src.drop(['Q_1_5_find'], axis=1, inplace=True) + + ## Calculate the channel portion of bankfull Volume + df_src['chann_volume_ratio'] = 1.0 # At stage=0 set channel_ratio to 1.0 (avoid div by 0) + df_src['chann_volume_ratio'].where(df_src['Stage'] == 0, df_src['Volume_bankfull'] / (df_src[volume_var]),inplace=True) + #df_src['chann_volume_ratio'] = df_src['chann_volume_ratio'].clip_upper(1.0) + df_src['chann_volume_ratio'].where(df_src['chann_volume_ratio'] <= 1.0, 1.0, inplace=True) # set > 1.0 ratio values to 1.0 (these are within the channel) + df_src['chann_volume_ratio'].where(df_src['discharge_1_5'] > 0.0, 0.0, inplace=True) # if the discharge_1_5 value <= 0 then set channel ratio to 0 (will use global overbank manning n) + #df_src.drop(['Volume_bankfull'], axis=1, inplace=True) + + ## Calculate the channel portion of bankfull Hydraulic Radius + df_src['chann_hradius_ratio'] = 1.0 # At stage=0 set channel_ratio to 1.0 (avoid div by 0) + df_src['chann_hradius_ratio'].where(df_src['Stage'] == 0, df_src['HRadius_bankfull'] / (df_src[hradius_var]),inplace=True) + #df_src['chann_hradius_ratio'] = df_src['HRadius_bankfull'] / (df_src[hradius_var]+.0001) # old adding 0.01 to avoid dividing by 0 at stage=0 + df_src['chann_hradius_ratio'].where(df_src['chann_hradius_ratio'] <= 1.0, 1.0, inplace=True) # set > 1.0 ratio values to 1.0 (these are within the channel) + df_src['chann_hradius_ratio'].where(df_src['discharge_1_5'] > 0.0, 0.0, inplace=True) # if the discharge_1_5 value <= 0 then set channel ratio to 0 (will use global overbank manning n) + #df_src.drop(['HRadius_bankfull'], axis=1, inplace=True) + + ## mask bankfull variables when the 1.5yr flow value is <= 0 + df_src['Stage_1_5'].mask(df_src['discharge_1_5'] <= 0.0,inplace=True) + + ## Create a new column to identify channel/floodplain via the bankfull stage value + df_src.loc[df_src['Stage'] <= df_src['Stage_1_5'], 'channel_fplain_1_5'] = 'channel' + df_src.loc[df_src['Stage'] > df_src['Stage_1_5'], 'channel_fplain_1_5'] = 'floodplain' + df_src['channel_fplain_1_5'] = df_src['channel_fplain_1_5'].fillna('channel') + + ## Output new SRC with bankfull column + df_src.to_csv(src_modify_filename,index=False) + log_text += 'Completed: ' + str(huc) + + ## plot rating curves (optional arg) + if src_plot_option == 'True': + if isdir(huc_output_dir) == False: + os.mkdir(huc_output_dir) + generate_src_plot(df_src, huc_output_dir) + + return(log_text) + +def generate_src_plot(df_src, plt_out_dir): + + ## create list of unique hydroids + hydroids = df_src.HydroID.unique().tolist() + #hydroids = [17820017] + + for hydroid in hydroids: + print("Creating SRC plot: " + str(hydroid)) + plot_df = df_src.loc[df_src['HydroID'] == hydroid] + + fig, axes = plt.subplots(1,2,figsize=(12, 6)) + fig.suptitle(str(hydroid)) + axes[0].set_title('Rating Curve w/ Bankfull') + axes[1].set_title('Channel Volume vs. HRadius Ratio') + sns.despine(fig, left=True, bottom=True) + sns.scatterplot(x='Discharge (m3s-1)', y='Stage', data=plot_df, ax=axes[0]) + sns.lineplot(x='Discharge (m3s-1)', y='Stage_1_5', data=plot_df, color='green', ax=axes[0]) + axes[0].fill_between(plot_df['Discharge (m3s-1)'], plot_df['Stage_1_5'],alpha=0.5) + axes[0].text(plot_df['Discharge (m3s-1)'].median(), plot_df['Stage_1_5'].median(), "NWM 1.5yr: " + str(plot_df['Stage_1_5'].median())) + sns.scatterplot(x='chann_volume_ratio', y='Stage', data=plot_df, ax=axes[1], label="chann_volume_ratio", s=38) + sns.scatterplot(x='chann_hradius_ratio', y='Stage', data=plot_df, ax=axes[1], label="chann_hradius_ratio", s=12) + axes[1].legend() + plt.savefig(plt_out_dir + os.sep + str(hydroid) + '_bankfull.png',dpi=100, bbox_inches='tight') + plt.close() + +def multi_process(src_bankfull_lookup, procs_list): + ## Initiate multiprocessing + print(f"Identifying bankfull stage for {len(procs_list)} hucs using {number_of_jobs} jobs") + with Pool(processes=number_of_jobs) as pool: + map_output = pool.map(src_bankfull_lookup, procs_list) + log_file.writelines(["%s\n" % item for item in map_output]) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Identify bankfull stage for each hydroid synthetic rating curve') + parser.add_argument('-fim_dir','--fim-dir', help='FIM output dir', required=True,type=str) + parser.add_argument('-flows','--bankfull-flow-input',help='NWM recurrence flows dir',required=True,type=str) + parser.add_argument('-j','--number-of-jobs',help='number of workers',required=False,default=1,type=int) + parser.add_argument('-plots','--src-plot-option',help='Optional (True or False): use this flag to create src plots for all hydroids. WARNING - long runtime',required=False,default='False',type=str) + + args = vars(parser.parse_args()) + + fim_dir = args['fim_dir'] + bankfull_flow_filepath = args['bankfull_flow_input'] + number_of_jobs = args['number_of_jobs'] + src_plot_option = args['src_plot_option'] + procs_list = [] + + ## Print message to user and initiate run clock + print('Writing progress to log file here: ' + str(join(fim_dir,'bankfull_detect.log'))) + print('This may take a few minutes...') + ## Create a time var to log run time + begin_time = dt.datetime.now() + + ## Check that the bankfull flow filepath exists and read to dataframe + if not isfile(bankfull_flow_filepath): + print('!!!ERROR: Can not find the input bankfull flow file: ' + str(bankfull_flow_filepath)) + else: + df_bflows = pd.read_csv(bankfull_flow_filepath,dtype={'feature_id': int}) + huc_list = os.listdir(fim_dir) + huc_pass_list = [] + for huc in huc_list: + if huc != 'logs' and huc[-3:] != 'log' and huc[-4:] != '.csv': + src_barc_full_filename = join(fim_dir,huc,'src_full_crosswalked_BARC.csv') + src_orig_full_filename = join(fim_dir,huc,'src_full_crosswalked.csv') + src_modify_filename = join(fim_dir,huc,'src_full_crosswalked_bankfull.csv') + huc_output_dir = join(fim_dir,huc,'src_plots') + ## check if BARC modified src_full_crosswalked_BARC.csv exists otherwise use the orginial src_full_crosswalked.csv + if isfile(src_barc_full_filename): + print(str(huc)) + huc_pass_list.append(str(huc) + " --> src_full_crosswalked_BARC.csv") + procs_list.append([src_barc_full_filename, src_modify_filename, df_bflows, huc, src_plot_option, huc_output_dir]) + elif isfile(src_orig_full_filename): + print(str(huc)) + huc_pass_list.append(str(huc) + " --> src_full_crosswalked.csv") + procs_list.append([src_orig_full_filename, src_modify_filename, df_bflows, huc, src_plot_option, huc_output_dir]) + else: + print(str(huc) + 'WARNING --> can not find the SRC crosswalked csv file in the fim output dir: ' + str(join(fim_dir,huc)) + ' - skipping this HUC!!!\n') + + ## initiate log file + print(f"Identifying bankfull stage for {len(procs_list)} hucs using {number_of_jobs} jobs") + sys.__stdout__ = sys.stdout + log_file = open(join(fim_dir,'logs','log_bankfull_indentify.log'),"w") + sys.stdout = log_file + log_file.write('START TIME: ' + str(begin_time) + '\n') + log_file.writelines(["%s\n" % item for item in huc_pass_list]) + log_file.write('#########################################################\n\n') + + ## Pass huc procs_list to multiprocessing function + multi_process(src_bankfull_lookup, procs_list) + + ## Record run time and close log file + end_time = dt.datetime.now() + log_file.write('END TIME: ' + str(end_time) + '\n') + tot_run_time = end_time - begin_time + log_file.write('TOTAL RUN TIME: ' + str(tot_run_time)) + sys.stdout = sys.__stdout__ + log_file.close() diff --git a/src/make_stages_and_catchlist.py b/src/make_stages_and_catchlist.py new file mode 100755 index 000000000..524360915 --- /dev/null +++ b/src/make_stages_and_catchlist.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 + +import argparse +import geopandas as gpd +import numpy as np +import sys +from utils.shared_functions import mem_profile + + +@mem_profile +def make_stages_and_catchlist(flows_filename, catchments_filename, stages_filename, catchlist_filename, stages_min, stages_interval, stages_max): + flows = gpd.read_file(flows_filename) + catchments = gpd.read_file(catchments_filename) + + + hydroIDs = flows['HydroID'].tolist() + len_of_hydroIDs = len(hydroIDs) + slopes = flows['S0'].tolist() + lengthkm = flows['LengthKm'].tolist() + areasqkm = catchments['areasqkm'].tolist() + + + stages_max = stages_max + stages_interval + stages = np.round(np.arange(stages_min,stages_max,stages_interval),4) + + with open(stages_filename,'w') as f: + f.write("Stage\n") + for stage in stages: + f.write("{}\n".format(stage)) + + with open(catchlist_filename,'w') as f: + f.write("{}\n".format(len_of_hydroIDs)) + for h,s,l,a in zip(hydroIDs,slopes,lengthkm,areasqkm): + f.write("{} {} {} {}\n".format(h,s,l,a)) + + #TODO we need a main block +if __name__ == '__main__': + # Parse arguments. + parser = argparse.ArgumentParser(description='make_stages_and_catchlist.py') + parser.add_argument('-f', '--flows-filename', help='flows-filename',required=True) + parser.add_argument('-c', '--catchments-filename', help='catchments-filename',required=True) + parser.add_argument('-s', '--stages-filename', help='stages-filename',required=True) + parser.add_argument('-a', '--catchlist-filename', help='catchlist-filename',required=True) + parser.add_argument('-m', '--stages-min', help='stages-min',required=True,type=float) + parser.add_argument('-i', '--stages-interval', help='stages-interval',required=True,type=float) + parser.add_argument('-t', '--stages-max', help='stages-max',required=True,type=float) + + # Extract to dictionary and assign to variables. + args = vars(parser.parse_args()) + + make_stages_and_catchlist(**args) diff --git a/lib/output_cleanup.py b/src/output_cleanup.py similarity index 71% rename from lib/output_cleanup.py rename to src/output_cleanup.py index 98dcb9044..2491f05a5 100755 --- a/lib/output_cleanup.py +++ b/src/output_cleanup.py @@ -1,10 +1,10 @@ #!/usr/bin/env python3 import os -import csv -import json -import shutil import argparse +from utils.shared_functions import mem_profile + +@mem_profile def output_cleanup(huc_number, output_folder_path, additional_whitelist, is_production, viz_post_processing): ''' Processes all the final output files to cleanup and add post-processing @@ -27,17 +27,33 @@ def output_cleanup(huc_number, output_folder_path, additional_whitelist, is_prod production_whitelist = [ 'rem_zeroed_masked.tif', 'gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg', + 'demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg', 'gw_catchments_reaches_filtered_addedAttributes.tif', 'hydroTable.csv', - 'src.json' + 'src.json', + 'small_segments.csv', + 'bathy_crosswalk_calcs.csv', + 'bathy_stream_order_calcs.csv', + 'bathy_thalweg_flag.csv', + 'bathy_xs_area_hydroid_lookup.csv', + 'src_full_crosswalked.csv', + 'usgs_elev_table.csv', + 'hand_ref_elev_table.csv', ] # List of files that will be saved during a viz run viz_whitelist = [ 'rem_zeroed_masked.tif', + 'gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg', + 'demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg', 'gw_catchments_reaches_filtered_addedAttributes.tif', 'hydroTable.csv', - 'src.json' + 'src.json', + 'small_segments.csv', + 'src_full_crosswalked.csv', + 'demDerived_reaches_split_points.gpkg', + 'flowdir_d8_burned_filled.tif', + 'dem_thalwegCond.tif' ] # If "production" run, only keep whitelisted files @@ -49,26 +65,8 @@ def output_cleanup(huc_number, output_folder_path, additional_whitelist, is_prod # Step 1, keep only files that Viz needs whitelist_directory(output_folder_path, viz_whitelist, additional_whitelist) - # Step 2, add feature_id to src.json and rename file - # Open src.json for writing feature_ids to - src_data = {} - with open(os.path.join(output_folder_path, 'src.json')) as jsonf: - src_data = json.load(jsonf) - - with open(os.path.join(output_folder_path, 'hydroTable.csv')) as csvf: - csvReader = csv.DictReader(csvf) - for row in csvReader: - if row['HydroID'].lstrip('0') in src_data and 'nwm_feature_id' not in src_data[row['HydroID'].lstrip('0')]: - src_data[row['HydroID'].lstrip('0')]['nwm_feature_id'] = row['feature_id'] - - # Write src_data to JSON file - with open(os.path.join(output_folder_path, f'rating_curves_{huc_number}.json'), 'w') as jsonf: - json.dump(src_data, jsonf) - - # Step 3, copy files to desired names - shutil.copy(os.path.join(output_folder_path, 'rem_zeroed_masked.tif'), os.path.join(output_folder_path, f'hand_grid_{huc_number}.tif')) - shutil.copy(os.path.join(output_folder_path, 'gw_catchments_reaches_filtered_addedAttributes.tif'), os.path.join(output_folder_path, f'catchments_{huc_number}.tif')) +@mem_profile def whitelist_directory(directory_path, whitelist, additional_whitelist): # Add any additional files to the whitelist that the user wanted to keep if additional_whitelist: @@ -78,7 +76,7 @@ def whitelist_directory(directory_path, whitelist, additional_whitelist): directory = os.fsencode(directory_path) for file in os.listdir(directory_path): filename = os.fsdecode(file) - if filename not in whitelist: + if filename not in whitelist: os.remove(os.path.join(directory_path, filename)) @@ -100,7 +98,6 @@ def whitelist_directory(directory_path, whitelist, additional_whitelist): additional_whitelist = args['additional_whitelist'] is_production = args['is_production'] is_viz_post_processing = args['is_viz_post_processing'] - + # Run output_cleanup output_cleanup(huc_number, output_folder_path, additional_whitelist, is_production, is_viz_post_processing) - \ No newline at end of file diff --git a/src/preprocess_rasters.py b/src/preprocess_rasters.py new file mode 100755 index 000000000..635e1f227 --- /dev/null +++ b/src/preprocess_rasters.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 + +import os +import sys +sys.path.append('/foss_fim/src') +from multiprocessing import Pool +import argparse +from utils.reproject_dem import reproject_dem +from utils.shared_functions import update_raster_profile +from utils.shared_variables import PREP_PROJECTION, PREP_PROJECTION_CM + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Reproject Elevation rasters and update profile') + parser.add_argument('-dem_dir','--dem-dir', help='DEM filename', required=True,type=str) + parser.add_argument('-j','--number-of-jobs',help='Number of processes to use. Default is 1.',required=False, default="1",type=int) + parser.add_argument('-nodata','--nodata-val', help='DEM nodata value', required=False,type=float,default=-9999.0) + parser.add_argument('-block','--blocksize', help='DEM blocksize', required=False,type=int,default=512) + parser.add_argument('-keep','--keep-intermediate', help='keep intermediate files', required=False,type=bool,default=True) + + args = vars(parser.parse_args()) + + dem_dir = args['dem_dir'] + number_of_jobs = args['number_of_jobs'] + nodata_val = args['nodata_val'] + blocksize = args['blocksize'] + keep_intermediate = args['keep_intermediate'] + + reproject_procs_list = [] + + for huc in os.listdir(dem_dir): + raster_dir = os.path.join(dem_dir,huc) + elev_cm = os.path.join(raster_dir, 'elev_cm.tif') + elev_cm_proj = os.path.join(raster_dir, 'elev_cm_proj.tif') + reproject_procs_list.append([raster_dir, elev_cm, elev_cm_proj, PREP_PROJECTION_CM]) + + # Multiprocess reprojection + with Pool(processes=number_of_jobs) as pool: + pool.map(reproject_dem, reproject_procs_list) + + profile_procs_list = [] + + for huc in os.listdir(dem_dir): + elev_m_tif = os.path.join(dem_dir,huc, 'elev_m.tif') + if not os.path.exists(elev_m_tif): + raster_dir = os.path.join(dem_dir,huc) + elev_cm_proj = os.path.join(raster_dir, 'elev_cm_proj.tif') + elev_m = os.path.join(raster_dir, 'elev_m.tif') + profile_procs_list.append([elev_cm_proj, elev_m,PREP_PROJECTION,nodata_val,blocksize,keep_intermediate]) + + # Multiprocess update profile + with Pool(processes=2) as pool: + # TODO read in windows becasue gdal rasters are massive + pool.map(update_raster_profile, profile_procs_list) diff --git a/lib/r_grow_distance.py b/src/r_grow_distance.py similarity index 99% rename from lib/r_grow_distance.py rename to src/r_grow_distance.py index 793ff8600..d42e88c0d 100755 --- a/lib/r_grow_distance.py +++ b/src/r_grow_distance.py @@ -5,6 +5,7 @@ import grass.script as gscript import argparse + def r_grow_distance(input_raster, grass_workspace, proximity_dtype, allocation_dtype): ''' Runs the r.grow.distance GRASS gis tool which given an input raster will produce an output proximity (or distance) and euclidian allocation tool. @@ -76,6 +77,7 @@ def r_grow_distance(input_raster, grass_workspace, proximity_dtype, allocation_d return output_proximity_path,output_allocation_path + if __name__ == '__main__': #Parse arguments diff --git a/src/reachID_grid_to_vector_points.py b/src/reachID_grid_to_vector_points.py new file mode 100755 index 000000000..849256953 --- /dev/null +++ b/src/reachID_grid_to_vector_points.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 + +import numpy as np +import argparse +import geopandas as gpd +from utils.shared_variables import PREP_PROJECTION +from shapely.geometry import Point +import rasterio +from utils.shared_functions import getDriver, mem_profile + +""" +USAGE: +./reachID_grid_to_vector_points.py + +""" + +@mem_profile +def convert_grid_cells_to_points(raster,index_option,output_points_filename=False): + + # Input raster + if isinstance(raster,str): + raster = rasterio.open(raster,'r') + + elif isinstance(raster,rasterio.io.DatasetReader): + pass + + else: + raise TypeError("Pass raster dataset or filepath for raster") + + (upper_left_x, x_size, x_rotation, upper_left_y, y_rotation, y_size) = raster.get_transform() + indices = np.nonzero(raster.read(1) >= 1) + + id =[None] * len(indices[0]);points = [None]*len(indices[0]) + + # Iterate over the Numpy points.. + i = 1 + for y_index,x_index in zip(*indices): + x = x_index * x_size + upper_left_x + (x_size / 2) # add half the cell size + y = y_index * y_size + upper_left_y + (y_size / 2) # to center the point + points[i-1] = Point(x,y) + if index_option == 'reachID': + reachID = np.array(list(raster.sample((Point(x,y).coords), indexes=1))).item() # check this; needs to add raster cell value + index + id[i-1] = reachID*10000 + i #reachID + i/100 + elif (index_option == 'featureID') |(index_option == 'pixelID'): + id[i-1] = i + i += 1 + + pointGDF = gpd.GeoDataFrame({'id' : id, 'geometry' : points},crs=PREP_PROJECTION,geometry='geometry') + + if output_points_filename == False: + return pointGDF + else: + pointGDF.to_file(output_points_filename,driver=getDriver(output_points_filename),index=False) + + +if __name__ == '__main__': + + # Parse arguments + parser = argparse.ArgumentParser(description='Converts a raster to points') + parser.add_argument('-r','--raster',help='Raster to be converted to points',required=True,type=str) + parser.add_argument('-i', '--index-option',help='Indexing option',required=True,type=str,choices=['reachID','featureID','pixelID']) + parser.add_argument('-p', '--output-points-filename',help='Output points layer filename',required=False,type=str,default=False) + + args = vars(parser.parse_args()) + + raster = args['raster'] + index_option = args['index_option'] + output_points_filename = args['output_points_filename'] + + convert_grid_cells_to_points(raster,index_option,output_points_filename) diff --git a/src/reduce_nhd_stream_density.py b/src/reduce_nhd_stream_density.py new file mode 100644 index 000000000..e11472746 --- /dev/null +++ b/src/reduce_nhd_stream_density.py @@ -0,0 +1,232 @@ +#!/usr/bin/env python3 + +import geopandas as gpd +import pandas as pd +import numpy as np +from collections import deque +import argparse +import pygeos +from shapely.wkb import dumps +from shapely.geometry import Point +from utils.shared_functions import getDriver + + +def subset_nhd_network(huc4,huc4_mask,selected_wbd8,nhd_streams_,headwaters_filename,headwater_id,nwm_intersections_filename,mainstem_flag=False): + + headwater_streams = pd.DataFrame() + + if mainstem_flag == False: + nhd_streams = gpd.read_file(nhd_streams_) + headwater_col = 'is_headwater' + id_col = 'headwaters_id' + n = -1 + else: + nhd_streams = nhd_streams_.copy() + headwater_col = 'mainstem' + id_col = 'nws_lid' + n = '' + + # Locate the closest NHDPlus HR stream segment to NWM headwater points. Done by HUC8 to reduce processing time and to contain NWM headwater in the same HUC + for index, row in selected_wbd8.iterrows(): + huc = row["HUC8"] + + # Double check that this is a nested HUC + if huc.startswith(str(huc4)): + + huc8_mask = selected_wbd8.loc[selected_wbd8.HUC8==huc] + huc8_mask = huc8_mask.reset_index(drop=True) + + # Masking headwaters by HUC8 + headwaters_mask = gpd.read_file(headwaters_filename, mask = huc8_mask) + headwaters_mask = headwaters_mask.reset_index(drop=True) + + # Masking subset streams by HUC8 + if mainstem_flag == False: + streams_subset = gpd.read_file(nhd_streams_, mask = huc8_mask) + else: + streams_subset = nhd_streams.loc[nhd_streams.HUC8==huc].copy() + if headwaters_mask.is_headwater.dtype != 'int': headwaters_mask.is_headwater = headwaters_mask.is_headwater.astype('int') + if headwaters_mask.is_colocated.dtype != 'int': headwaters_mask.is_colocated = headwaters_mask.is_colocated.astype('int') + headwaters_mask = headwaters_mask.loc[headwaters_mask.is_headwater==True] + + if not streams_subset.empty: + streams_subset[headwater_col] = False + streams_subset = streams_subset.reset_index(drop=True) + + # Create WKB geometry column + streams_subset['b_geom'] = None + for index, linestring in enumerate(streams_subset.geometry): + streams_subset.at[index, 'b_geom'] = dumps(linestring) + + # Create pygeos nhd stream geometries from WKB representation + streambin_geom = pygeos.io.from_wkb(streams_subset['b_geom']) + + # Add HUC8 column + streams_subset['HUC8'] = str(huc) + + # Add headwaters_id column + streams_subset[id_col] = n + distance_from_upstream = {} + for index, point in headwaters_mask.iterrows(): + + # Convert headwater point geometries to WKB representation + wkb_point = dumps(point.geometry) + + # Create pygeos headwater point geometries from WKB representation + pointbin_geom = pygeos.io.from_wkb(wkb_point) + + # Distance to each stream segment + distances = pygeos.measurement.distance(streambin_geom, pointbin_geom) + + # Find minimum distance + min_index = np.argmin(distances) + headwater_point_name = point[headwater_id] + + # Find stream segment closest to headwater point + if mainstem_flag==True: + + if point.is_colocated==True: + + closest_stream = streams_subset.iloc[min_index] + distance_to_line = point.geometry.distance(Point(closest_stream.geometry.coords[-1])) + print(f"{point.nws_lid} distance on line {closest_stream.NHDPlusID}: {np.round(distance_to_line,1)}") + + if not closest_stream.NHDPlusID in distance_from_upstream.keys(): + distance_from_upstream[closest_stream.NHDPlusID] = [point.nws_lid,distance_to_line] + + elif distance_from_upstream[closest_stream.NHDPlusID][1] > distance_to_line: + distance_from_upstream[closest_stream.NHDPlusID] = [point.nws_lid,distance_to_line] + + headwater_point_name = distance_from_upstream[closest_stream.NHDPlusID][0] + + # Closest segment to headwater + streams_subset.loc[min_index,headwater_col] = True + streams_subset.loc[min_index,id_col] = headwater_point_name + + headwater_streams = headwater_streams.append(streams_subset[['NHDPlusID',headwater_col,id_col,'HUC8']]) + + headwater_streams = headwater_streams.sort_values(headwater_col, ascending=False).drop_duplicates('NHDPlusID') # keeps headwater=True for conflicting duplicates + + if mainstem_flag == False: + nhd_streams = nhd_streams.merge(headwater_streams,on='NHDPlusID',how='inner') + else: + headwater_streams = headwater_streams.drop(columns=['HUC8']) + nhd_streams = nhd_streams.merge(headwater_streams,on='NHDPlusID',how='outer') + nhd_streams[id_col] = nhd_streams[id_col].fillna(n) + nhd_streams[headwater_col] = nhd_streams[headwater_col].fillna(0) + + del selected_wbd8, streams_subset, headwater_streams + + huc4_mask_buffer = huc4_mask.buffer(10) + + # Identify inflowing streams + nwm_intersections = gpd.read_file(nwm_intersections_filename, mask=huc4_mask_buffer) + + if mainstem_flag == False: + nhd_streams['downstream_of_headwater'] = False + nhd_streams['is_relevant_stream'] = nhd_streams['is_headwater'].copy() + else: + nwm_intersections = nwm_intersections.loc[nwm_intersections.mainstem==1] + + nhd_streams = nhd_streams.explode() + nhd_streams = nhd_streams.reset_index(drop=True) + + + + # Find stream segment closest to nwm intersection point + for index, point in nwm_intersections.iterrows(): + + # Distance to each stream segment + distances = nhd_streams.distance(point.geometry) + + # Find minimum distance + min_index = np.argmin(distances) + + # Update attributes for incoming stream + nhd_streams.loc[min_index,headwater_col] = True + + if mainstem_flag == False: + nhd_streams.loc[min_index,'downstream_of_headwater'] = True + nhd_streams['is_relevant_stream'] = nhd_streams[headwater_col].copy() + + # Trace down from headwaters + nhd_streams.set_index('NHDPlusID',inplace=True,drop=False) + + nhd_streams = get_downstream_segments(nhd_streams.copy(),headwater_col,mainstem_flag) + + # nhd_streams.fillna(value = {"is_relevant_stream": False}, inplace=True) + nhd_streams = nhd_streams.loc[nhd_streams['is_relevant_stream'],:] + nhd_streams.reset_index(drop=True,inplace=True) + + return nhd_streams + + +def get_downstream_segments(streams, attribute,mainstem_flag): + + Q = deque(streams.loc[streams[attribute],'NHDPlusID'].tolist()) + visited = set() + + while Q: + q = Q.popleft() + if q in visited: + continue + + visited.add(q) + toNode,DnLevelPat = streams.loc[q,['ToNode','DnLevelPat']] + + try: + downstream_ids = streams.loc[streams['FromNode'] == toNode,:].index.tolist() + except ValueError: # 18050002 has duplicate nhd stream feature + if len(toNode.unique()) == 1: + toNode = toNode.iloc[0] + downstream_ids = streams.loc[streams['FromNode'] == toNode,:].index.tolist() + + # If multiple downstream_ids are returned select the ids that are along the main flow path (i.e. exclude segments that are diversions) + if len(set(downstream_ids))>1: # special case: remove duplicate NHDPlusIDs + relevant_ids = [segment for segment in downstream_ids if DnLevelPat == streams.loc[segment,'LevelPathI']] + else: + relevant_ids = downstream_ids + + if mainstem_flag == False: + + streams.loc[relevant_ids,'is_relevant_stream'] = True + streams.loc[relevant_ids,'downstream_of_headwater'] = True + else: + streams.loc[relevant_ids,'mainstem'] = True + + for i in relevant_ids: + if i not in visited: + Q.append(i) + + return streams + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Reduce NHDPlus HR network based on headwater points') + parser.add_argument('-n','--huc-number',help='HUC number',required=True,type=str) + parser.add_argument('-b','--huc4-mask',help='HUC4 mask',required=True) + parser.add_argument('-w','--selected-wbd8',help='WBD8 layer',required=True) + parser.add_argument('-t','--nhd-streams',help='NHDPlus HR geodataframe',required=True) + parser.add_argument('-a','--headwaters-filename',help='Headwaters points layer name',required=True,type=str) + parser.add_argument('-s','--subset-nhd-streams-fileName',help='Output streams layer name',required=False,type=str,default=None) + parser.add_argument('-i','--headwater-id',help='Headwater points ID column',required=True) + parser.add_argument('-c','--nwm-intersections-filename',help='NWM HUC4 intersection points',required=True) + parser.add_argument('-d','--mainstem-flag',help='flag for mainstems network',required=False,default=False) + + args = vars(parser.parse_args()) + + huc_number = args['huc_number'] + huc4_mask = args['huc4_mask'] + selected_wbd8 = args['selected_wbd8'] + nhd_streams = args['nhd_streams'] + headwaters_filename = args['headwaters_filename'] + subset_nhd_streams_fileName = args['subset_nhd_streams_fileName'] + headwater_id = args['headwater_id'] + nwm_intersections_filename = args['nwm_intersections_filename'] + mainstem_flag = args['mainstem_flag'] + + subset_streams_gdf = subset_nhd_network(huc_number,huc4_mask,selected_wbd8,nhd_streams,headwaters_filename,headwater_id,nwm_intersections_filename,mainstem_flag=False) + + if subset_nhd_streams_fileName is not None: + subset_streams_gdf.to_file(subset_nhd_streams_fileName,driver=getDriver(subset_nhd_streams_fileName),index=False) diff --git a/src/rem.py b/src/rem.py new file mode 100755 index 000000000..c064710ed --- /dev/null +++ b/src/rem.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python3 + +from numba import njit, typed, types +import rasterio +import numpy as np +import argparse +import pandas as pd +import geopandas as gpd +from utils.shared_functions import getDriver, mem_profile + + +@mem_profile +def rel_dem(dem_fileName, pixel_watersheds_fileName, rem_fileName, thalweg_raster, hydroid_fileName, dem_reaches_filename): + """ + Calculates REM/HAND/Detrended DEM + + Parameters + ---------- + dem_fileName : str + File name of pit filled DEM raster. + pixel_watersheds_fileName : str + File name of stream pixel watersheds raster. + rem_fileName : str + File name of output relative elevation raster. + hydroid_fileName : str + File name of the hydroid raster (i.e. gw_catchments_reaches.tif) + dem_reaches_filename + File name of the reaches layer to populate HAND elevation attribute values and overwrite as output + + """ + + # ------------------------------------------- Get catchment_hydroid_dict --------------------------------------------------- # + # The following creates a dictionary of the catchment ids (key) and their hydroid along the thalweg (value). + # This is needed to produce a HAND zero reference elevation by hydroid dataframe (helpful for evaluating rating curves & bathy properties) + @njit + def make_catchment_hydroid_dict(flat_value_raster, catchment_hydroid_dict, flat_catchments, thalweg_window): + + for i,cm in enumerate(flat_catchments): + if thalweg_window[i] == 1: # Only allow reference hydroid to be within thalweg. + catchment_hydroid_dict[cm] = flat_value_raster[i] + return(catchment_hydroid_dict) + + # Open files. + gw_catchments_pixels_masked_object = rasterio.open(pixel_watersheds_fileName) + hydroid_pixels_object = rasterio.open(hydroid_fileName) + thalweg_raster_object = rasterio.open(thalweg_raster) + + # Specify raster object metadata. + meta = hydroid_pixels_object.meta.copy() + meta['tiled'], meta['compress'] = True, 'lzw' + + # -- Create catchment_hydroid_dict -- # + catchment_hydroid_dict = typed.Dict.empty(types.int64,types.int64) # Initialize an empty dictionary to store the catchment hydroid. + # Update catchment_hydroid_dict with each pixel sheds hydroid. + # Creating dictionary containing catchment ids (key) and corresponding hydroid within the thalweg... + for ji, window in hydroid_pixels_object.block_windows(1): # Iterate over windows, using dem_rasterio_object as template + hydroid_window = hydroid_pixels_object.read(1,window=window).ravel() # Define hydroid_window + catchments_window = gw_catchments_pixels_masked_object.read(1,window=window).ravel() # Define catchments_window + thalweg_window = thalweg_raster_object.read(1, window=window).ravel() # Define cost_window + + # Call numba-optimized function to update catchment_hydroid_dict with pixel sheds overlapping hydroid. + catchment_hydroid_dict = make_catchment_hydroid_dict(hydroid_window, catchment_hydroid_dict, catchments_window, thalweg_window) + + hydroid_pixels_object.close() + gw_catchments_pixels_masked_object.close() + thalweg_raster_object.close() + # ------------------------------------------- Get catchment_min_dict --------------------------------------------------- # + # The following creates a dictionary of the catchment ids (key) and their elevation along the thalweg (value). + @njit + def make_catchment_min_dict(flat_dem, catchment_min_dict, flat_catchments, thalweg_window): + + for i,cm in enumerate(flat_catchments): + if thalweg_window[i] == 1: # Only allow reference elevation to be within thalweg + # If the catchment really exists in the dictionary, compare elevation values + if (cm in catchment_min_dict): + if (flat_dem[i] < catchment_min_dict[cm]): + # If the flat_dem's elevation value is less than the catchment_min_dict min, update the catchment_min_dict min + catchment_min_dict[cm] = flat_dem[i] + else: + catchment_min_dict[cm] = flat_dem[i] + return(catchment_min_dict) + + # Open files. + gw_catchments_pixels_masked_object = rasterio.open(pixel_watersheds_fileName) + dem_thalwegCond_masked_object = rasterio.open(dem_fileName) + thalweg_raster_object = rasterio.open(thalweg_raster) + + # Specify raster object metadata. + meta = dem_thalwegCond_masked_object.meta.copy() + meta['tiled'], meta['compress'] = True, 'lzw' + + # -- Create catchment_min_dict -- # + catchment_min_dict = typed.Dict.empty(types.int64,types.float32) # Initialize an empty dictionary to store the catchment minimums + # Update catchment_min_dict with pixel sheds minimum. + # Creating dictionary containing catchment ids (key) and corresponding elevation within the thalweg (value)... + for ji, window in dem_thalwegCond_masked_object.block_windows(1): # Iterate over windows, using dem_rasterio_object as template + dem_window = dem_thalwegCond_masked_object.read(1,window=window).ravel() # Define dem_window + catchments_window = gw_catchments_pixels_masked_object.read(1,window=window).ravel() # Define catchments_window + thalweg_window = thalweg_raster_object.read(1, window=window).ravel() # Define thalweg_window + + # Call numba-optimized function to update catchment_min_dict with pixel sheds minimum. + catchment_min_dict = make_catchment_min_dict(dem_window, catchment_min_dict, catchments_window, thalweg_window) + + dem_thalwegCond_masked_object.close() + gw_catchments_pixels_masked_object.close() + thalweg_raster_object.close() + + # Merge and export dictionary to csv. + catchment_min_dict_df = pd.DataFrame.from_dict(catchment_min_dict, orient='index') # convert dict to dataframe + catchment_min_dict_df.columns = ['Median_Thal_Elev_m'] + catchment_hydroid_dict_df = pd.DataFrame.from_dict(catchment_hydroid_dict, orient='index') # convert dict to dataframe + catchment_hydroid_dict_df.columns = ['HydroID'] + merge_df = catchment_hydroid_dict_df.merge(catchment_min_dict_df, left_index=True, right_index=True) + merge_df.index.name = 'pixelcatch_id' + + # Merge the HAND reference elevation by HydroID dataframe with the demDerived_reaches layer (add new layer attribute) + min_by_hydroid = merge_df.groupby(['HydroID']).min() # min value of all med_thal_elev for pixel catchments in each HydroID reach + min_by_hydroid.columns = ['min_thal_elev'] + med_by_hydroid = merge_df.groupby(['HydroID']).median() # median value of all med_thal_elev for pixel catchments in each HydroID reach + med_by_hydroid.columns = ['med_thal_elev'] + max_by_hydroid = merge_df.groupby(['HydroID']).max() # max value of all med_thal_elev for pixel catchments in each HydroID reach + max_by_hydroid.columns = ['max_thal_elev'] + input_reaches = gpd.read_file(dem_reaches_filename) + input_reaches = input_reaches.merge(min_by_hydroid, on='HydroID') # merge dataframes by HydroID variable + input_reaches = input_reaches.merge(med_by_hydroid, on='HydroID') # merge dataframes by HydroID variable + input_reaches = input_reaches.merge(max_by_hydroid, on='HydroID') # merge dataframes by HydroID variable + input_reaches.to_file(dem_reaches_filename,driver=getDriver(dem_reaches_filename),index=False) + # ------------------------------------------------------------------------------------------------------------------------ # + + + # ------------------------------------------- Produce relative elevation model ------------------------------------------- # + @njit + def calculate_rem(flat_dem,catchmentMinDict,flat_catchments,ndv): + rem_window = np.zeros(len(flat_dem),dtype=np.float32) + for i,cm in enumerate(flat_catchments): + if cm in catchmentMinDict: + if catchmentMinDict[cm] == ndv: + rem_window[i] = ndv + else: + rem_window[i] = flat_dem[i] - catchmentMinDict[cm] + + return(rem_window) + + rem_rasterio_object = rasterio.open(rem_fileName,'w',**meta) # Open rem_rasterio_object for writing to rem_fileName. + pixel_catchments_rasterio_object = rasterio.open(pixel_watersheds_fileName) # Open pixel_catchments_rasterio_object + dem_rasterio_object = rasterio.open(dem_fileName) + + # Producing relative elevation model raster + for ji, window in dem_rasterio_object.block_windows(1): + dem_window = dem_rasterio_object.read(1,window=window) + window_shape = dem_window.shape + + dem_window = dem_window.ravel() + catchments_window = pixel_catchments_rasterio_object.read(1,window=window).ravel() + + rem_window = calculate_rem(dem_window, catchment_min_dict, catchments_window, meta['nodata']) + rem_window = rem_window.reshape(window_shape).astype(np.float32) + + rem_rasterio_object.write(rem_window, window=window, indexes=1) + + dem_rasterio_object.close() + pixel_catchments_rasterio_object.close() + rem_rasterio_object.close() + # ------------------------------------------------------------------------------------------------------------------------ # + + +if __name__ == '__main__': + + # Parse arguments. + parser = argparse.ArgumentParser(description='Relative elevation from pixel based watersheds') + parser.add_argument('-d','--dem', help='DEM to use within project path', required=True) + parser.add_argument('-w','--watersheds',help='Pixel based watersheds raster to use within project path',required=True) + parser.add_argument('-t','--thalweg-raster',help='A binary raster representing the thalweg. 1 for thalweg, 0 for non-thalweg.',required=True) + parser.add_argument('-o','--rem',help='Output REM raster',required=True) + parser.add_argument('-i','--hydroid', help='HydroID raster to use within project path', required=True) + parser.add_argument('-s','--dem_reaches_in_out',help='DEM derived reach layer to join HAND reference elevation attribute',required=True) + + # Extract arguments to dictionary. + args = vars(parser.parse_args()) + + # Define variable inputs. + dem_fileName = args['dem'] + pixel_watersheds_fileName = args['watersheds'] + rem_fileName = args['rem'] + thalweg_raster = args['thalweg_raster'] + hydroid_fileName = args['hydroid'] + dem_reaches_filename = args['dem_reaches_in_out'] + + rel_dem(dem_fileName, pixel_watersheds_fileName, rem_fileName, thalweg_raster, hydroid_fileName, dem_reaches_filename) diff --git a/lib/run_by_unit.sh b/src/run_by_unit.sh similarity index 68% rename from lib/run_by_unit.sh rename to src/run_by_unit.sh index d2c63a208..b46f7ce9a 100755 --- a/lib/run_by_unit.sh +++ b/src/run_by_unit.sh @@ -5,8 +5,12 @@ T_total_start echo -e $startDiv"Parameter Values" echo -e "extent=$extent" -echo -e "negativeBurnValue=$negativeBurnValue" -echo -e "maxSplitDistance_meters=$maxSplitDistance_meters" +echo -e "agree_DEM_buffer=$agree_DEM_buffer" +echo -e "wbd_buffer=$wbd_buffer" +echo -e "ms_buffer_dist=$ms_buffer_dist" +echo -e "lakes_buffer_dist_meters=$lakes_buffer_dist_meters" +echo -e "negative_burn_value=$negative_burn_value" +echo -e "max_split_distance_meters=$max_split_distance_meters" echo -e "mannings_n=$manning_n" echo -e "stage_min_meters=$stage_min_meters" echo -e "stage_interval_meters=$stage_interval_meters" @@ -15,7 +19,7 @@ echo -e "slope_min=$slope_min" echo -e "ms_buffer_dist=$ms_buffer_dist" echo -e "ncores_gw=$ncores_gw" echo -e "ncores_fd=$ncores_fd" -echo -e "defaultMaxJobs=$defaultMaxJobs" +echo -e "default_max_jobs=$default_max_jobs" echo -e "memfree=$memfree"$stopDiv ## SET OUTPUT DIRECTORY FOR UNIT ## @@ -28,11 +32,13 @@ hucUnitLength=${#hucNumber} huc4Identifier=${hucNumber:0:4} huc2Identifier=${hucNumber:0:2} input_NHD_WBHD_layer=WBDHU$hucUnitLength -input_DEM=$inputDataDir/nhdplus_rasters/HRNHDPlusRasters"$huc4Identifier"/elev_cm.tif +input_DEM=$inputDataDir/nhdplus_rasters/HRNHDPlusRasters"$huc4Identifier"/elev_m.tif input_NLD=$inputDataDir/nld_vectors/huc2_levee_lines/nld_preprocessed_"$huc2Identifier".gpkg +input_bathy_bankfull=$inputDataDir/$bankfull_input_table + # Define the landsea water body mask using either Great Lakes or Ocean polygon input # if [[ $huc2Identifier == "04" ]] ; then - input_LANDSEA=$inputDataDir/landsea/gl_water_polygons.gpkg + input_LANDSEA=$input_GL_boundaries echo -e "Using $input_LANDSEA for water body mask (Great Lakes)" else input_LANDSEA=$inputDataDir/landsea/water_polygons_us.gpkg @@ -46,25 +52,17 @@ Tstart ogr2ogr -f GPKG $outputHucDataDir/wbd.gpkg $input_WBD_gdb $input_NHD_WBHD_layer -where "HUC$hucUnitLength='$hucNumber'" Tcount -## BUFFER WBD ## -echo -e $startDiv"Buffer WBD $hucNumber"$stopDiv -date -u -Tstart -[ ! -f $outputHucDataDir/wbd_buffered.gpkg ] && \ -ogr2ogr -f GPKG -dialect sqlite -sql "select ST_buffer(geom, 5000) from 'WBDHU$hucUnitLength'" $outputHucDataDir/wbd_buffered.gpkg $outputHucDataDir/wbd.gpkg -Tcount - -## GET STREAMS ## +## Subset Vector Layers ## echo -e $startDiv"Get Vector Layers and Subset $hucNumber"$stopDiv date -u Tstart [ ! -f $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg ] && \ -$libDir/snap_and_clip_to_nhd.py -d $hucNumber -w $input_NWM_Flows -f $input_NWM_Headwaters -s $input_NHD_Flowlines -l $input_NWM_Lakes -r $input_NLD -u $outputHucDataDir/wbd.gpkg -g $outputHucDataDir/wbd_buffered.gpkg -y $inputDataDir/ahp_sites/ahps_sites.gpkg -v $input_LANDSEA -c $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg -z $outputHucDataDir/nld_subset_levees.gpkg -a $outputHucDataDir/nwm_lakes_proj_subset.gpkg -t $outputHucDataDir/nwm_headwaters_proj_subset.gpkg -m $input_NWM_Catchments -n $outputHucDataDir/nwm_catchments_proj_subset.gpkg -e $outputHucDataDir/nhd_headwater_points_subset.gpkg -x $outputHucDataDir/LandSea_subset.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -p $extent +python3 -m memory_profiler $srcDir/clip_vectors_to_wbd.py -d $hucNumber -w $input_nwm_flows -s $input_nhd_flowlines -l $input_nwm_lakes -r $input_NLD -g $outputHucDataDir/wbd.gpkg -f $outputHucDataDir/wbd_buffered.gpkg -m $input_nwm_catchments -y $input_nhd_headwaters -v $input_LANDSEA -c $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg -z $outputHucDataDir/nld_subset_levees.gpkg -a $outputHucDataDir/nwm_lakes_proj_subset.gpkg -n $outputHucDataDir/nwm_catchments_proj_subset.gpkg -e $outputHucDataDir/nhd_headwater_points_subset.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -x $outputHucDataDir/LandSea_subset.gpkg -extent $extent -gl $input_GL_boundaries -lb $lakes_buffer_dist_meters -wb $wbd_buffer Tcount if [ "$extent" = "MS" ]; then - if [[ ! -f $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg ]] ; then - echo "No AHPs point(s) within HUC $hucNumber boundaries. Aborting run_by_unit.sh" + if [[ ! -f $outputHucDataDir/nhd_headwater_points_subset.gpkg ]] ; then + echo "EXIT FLAG!! (exit 55): No AHPs point(s) within HUC $hucNumber boundaries. Aborting run_by_unit.sh" rm -rf $outputHucDataDir exit 0 fi @@ -81,30 +79,22 @@ Tcount echo -e $startDiv"Clip DEM $hucNumber"$stopDiv date -u Tstart -[ ! -f $outputHucDataDir/dem.tif ] && \ -gdalwarp -cutline $outputHucDataDir/wbd_buffered.gpkg -crop_to_cutline -ot Int32 -r bilinear -of "GTiff" -overwrite -co "BLOCKXSIZE=512" -co "BLOCKYSIZE=512" -co "TILED=YES" -co "COMPRESS=LZW" -co "BIGTIFF=YES" $input_DEM $outputHucDataDir/dem.tif +[ ! -f $outputHucDataDir/dem_meters.tif ] && \ +gdalwarp -cutline $outputHucDataDir/wbd_buffered.gpkg -crop_to_cutline -ot Float32 -r bilinear -of "GTiff" -overwrite -co "BLOCKXSIZE=512" -co "BLOCKYSIZE=512" -co "TILED=YES" -co "COMPRESS=LZW" -co "BIGTIFF=YES" $input_DEM $outputHucDataDir/dem_meters.tif Tcount ## GET RASTER METADATA echo -e $startDiv"Get DEM Metadata $hucNumber"$stopDiv date -u Tstart -read fsize ncols nrows ndv xmin ymin xmax ymax cellsize_resx cellsize_resy<<<$($libDir/getRasterInfoNative.py $outputHucDataDir/dem.tif) +read fsize ncols nrows ndv xmin ymin xmax ymax cellsize_resx cellsize_resy<<<$($srcDir/getRasterInfoNative.py $outputHucDataDir/dem_meters.tif) ## RASTERIZE NLD MULTILINES ## echo -e $startDiv"Rasterize all NLD multilines using zelev vertices"$stopDiv date -u Tstart [ ! -f $outputHucDataDir/nld_rasterized_elev.tif ] && [ -f $outputHucDataDir/nld_subset_levees.gpkg ] && \ -gdal_rasterize -l nld_subset_levees -3d -at -init $ndv -te $xmin $ymin $xmax $ymax -ts $ncols $nrows -ot Float32 -of GTiff -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" $outputHucDataDir/nld_subset_levees.gpkg $outputHucDataDir/nld_rasterized_elev.tif -Tcount - -## CONVERT TO METERS ## -echo -e $startDiv"Convert DEM to Meters $hucNumber"$stopDiv -date -u -Tstart -[ ! -f $outputHucDataDir/dem_meters.tif ] && \ -gdal_calc.py --quiet --type=Float32 --co "BLOCKXSIZE=512" --co "BLOCKYSIZE=512" --co "TILED=YES" --co "COMPRESS=LZW" --co "BIGTIFF=YES" -A $outputHucDataDir/dem.tif --outfile="$outputHucDataDir/dem_meters.tif" --calc="A/100" --NoDataValue=$ndv +gdal_rasterize -l nld_subset_levees -3d -at -a_nodata $ndv -te $xmin $ymin $xmax $ymax -ts $ncols $nrows -ot Float32 -of GTiff -co "BLOCKXSIZE=512" -co "BLOCKYSIZE=512" -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" $outputHucDataDir/nld_subset_levees.gpkg $outputHucDataDir/nld_rasterized_elev.tif Tcount ## RASTERIZE REACH BOOLEAN (1 & 0) ## @@ -138,18 +128,17 @@ echo -e $startDiv"Burn nld levees into dem & convert nld elev to meters (*Overwr date -u Tstart [ -f $outputHucDataDir/nld_rasterized_elev.tif ] && \ -gdal_calc.py --quiet --type=Float32 --overwrite --NoDataValue $ndv --co "BLOCKXSIZE=512" --co "BLOCKYSIZE=512" --co "TILED=YES" --co "COMPRESS=LZW" --co "BIGTIFF=YES" -A $outputHucDataDir/dem_meters.tif -B $outputHucDataDir/nld_rasterized_elev.tif --outfile="$outputHucDataDir/dem_meters.tif" --calc="maximum(A,(B*0.3048))" --NoDataValue=$ndv +python3 -m memory_profiler $srcDir/burn_in_levees.py -dem $outputHucDataDir/dem_meters.tif -nld $outputHucDataDir/nld_rasterized_elev.tif -out $outputHucDataDir/dem_meters.tif Tcount ## DEM Reconditioning ## -# Using AGREE methodology, hydroenforce the DEM so that it is consistent -# with the supplied stream network. This allows for more realistic catchment -# delineation which is ultimately reflected in the output FIM mapping. -echo -e $startDiv"Creating AGREE DEM using $buffer meter buffer"$stopDiv +# Using AGREE methodology, hydroenforce the DEM so that it is consistent with the supplied stream network. +# This allows for more realistic catchment delineation which is ultimately reflected in the output FIM mapping. +echo -e $startDiv"Creating AGREE DEM using $agree_DEM_buffer meter buffer"$stopDiv date -u Tstart [ ! -f $outputHucDataDir/dem_burned.tif ] && \ -$libDir/agreedem.py -r $outputHucDataDir/flows_grid_boolean.tif -d $outputHucDataDir/dem_meters.tif -w $outputHucDataDir -g $outputHucDataDir/temp_work -o $outputHucDataDir/dem_burned.tif -b $buffer -sm 10 -sh 1000 +python3 -m memory_profiler $srcDir/agreedem.py -r $outputHucDataDir/flows_grid_boolean.tif -d $outputHucDataDir/dem_meters.tif -w $outputHucDataDir -g $outputHucDataDir/temp_work -o $outputHucDataDir/dem_burned.tif -b $agree_DEM_buffer -sm 10 -sh 1000 Tcount ## PIT REMOVE BURNED DEM ## @@ -194,14 +183,14 @@ Tcount echo -e $startDiv"Preprocessing for lateral thalweg adjustment $hucNumber"$stopDiv date -u Tstart -$libDir/unique_pixel_and_allocation.py -s $outputHucDataDir/demDerived_streamPixels.tif -o $outputHucDataDir/demDerived_streamPixels_ids.tif -g $outputHucDataDir/temp_grass +python3 -m memory_profiler $srcDir/unique_pixel_and_allocation.py -s $outputHucDataDir/demDerived_streamPixels.tif -o $outputHucDataDir/demDerived_streamPixels_ids.tif -g $outputHucDataDir/temp_grass Tcount ## ADJUST THALWEG MINIMUM USING LATERAL ZONAL MINIMUM ## echo -e $startDiv"Performing lateral thalweg adjustment $hucNumber"$stopDiv date -u Tstart -$libDir/adjust_thalweg_lateral.py -e $outputHucDataDir/dem_meters.tif -s $outputHucDataDir/demDerived_streamPixels.tif -a $outputHucDataDir/demDerived_streamPixels_ids_allo.tif -d $outputHucDataDir/demDerived_streamPixels_ids_dist.tif -t 50 -o $outputHucDataDir/dem_lateral_thalweg_adj.tif +python3 -m memory_profiler $srcDir/adjust_thalweg_lateral.py -e $outputHucDataDir/dem_meters.tif -s $outputHucDataDir/demDerived_streamPixels.tif -a $outputHucDataDir/demDerived_streamPixels_ids_allo.tif -d $outputHucDataDir/demDerived_streamPixels_ids_dist.tif -t 50 -o $outputHucDataDir/dem_lateral_thalweg_adj.tif -th $thalweg_lateral_elev_threshold Tcount ## MASK BURNED DEM FOR STREAMS ONLY ### @@ -240,11 +229,11 @@ echo -e $startDiv"Split Derived Reaches $hucNumber"$stopDiv date -u Tstart [ ! -f $outputHucDataDir/demDerived_reaches_split.gpkg ] && \ -$libDir/split_flows.py $outputHucDataDir/demDerived_reaches.shp $outputHucDataDir/dem_thalwegCond.tif $outputHucDataDir/demDerived_reaches_split.gpkg $outputHucDataDir/demDerived_reaches_split_points.gpkg $maxSplitDistance_meters $slope_min $outputHucDataDir/wbd8_clp.gpkg $outputHucDataDir/nwm_lakes_proj_subset.gpkg $lakes_buffer_dist_meters +python3 -m memory_profiler $srcDir/split_flows.py -f $outputHucDataDir/demDerived_reaches.shp -d $outputHucDataDir/dem_thalwegCond.tif -s $outputHucDataDir/demDerived_reaches_split.gpkg -p $outputHucDataDir/demDerived_reaches_split_points.gpkg -w $outputHucDataDir/wbd8_clp.gpkg -l $outputHucDataDir/nwm_lakes_proj_subset.gpkg Tcount if [[ ! -f $outputHucDataDir/demDerived_reaches_split.gpkg ]] ; then - echo "No AHPs point(s) within HUC $hucNumber boundaries. Aborting run_by_unit.sh" + echo "EXIT FLAG!! (exit 56): No AHPs point(s) within HUC $hucNumber boundaries. Aborting run_by_unit.sh" rm -rf $outputHucDataDir exit 0 fi @@ -254,11 +243,11 @@ if [ "$extent" = "MS" ]; then echo -e $startDiv"Mask Rasters with Stream Buffer $hucNumber"$stopDiv date -u Tstart - $libDir/fr_to_ms_raster_mask.py $outputHucDataDir/demDerived_reaches_split.gpkg $outputHucDataDir/flowdir_d8_burned_filled.tif $outputHucDataDir/dem_thalwegCond.tif $outputHucDataDir/slopes_d8_dem_meters.tif $outputHucDataDir/flowdir_d8_MS.tif $outputHucDataDir/dem_thalwegCond_MS.tif $outputHucDataDir/slopes_d8_dem_metersMS.tif $outputHucDataDir/demDerived_streamPixels.tif $outputHucDataDir/demDerived_streamPixelsMS.tif $ms_buffer_dist + python3 -m memory_profiler $srcDir/fr_to_ms_raster_mask.py -s $outputHucDataDir/demDerived_reaches_split.gpkg -f $outputHucDataDir/flowdir_d8_burned_filled.tif -d $outputHucDataDir/dem_thalwegCond.tif -r $outputHucDataDir/slopes_d8_dem_meters.tif -m $outputHucDataDir/flowdir_d8_MS.tif -n $outputHucDataDir/dem_thalwegCond_MS.tif -o $outputHucDataDir/slopes_d8_dem_metersMS.tif -p $outputHucDataDir/demDerived_streamPixels.tif -q $outputHucDataDir/demDerived_streamPixelsMS.tif Tcount if [[ ! -f $outputHucDataDir/dem_thalwegCond_MS.tif ]] ; then - echo "No AHPs point(s) within HUC $hucNumber boundaries. Aborting run_by_unit.sh" + echo "EXIT FLAG!! (exit 57): No AHPs point(s) within HUC $hucNumber boundaries. Aborting run_by_unit.sh" rm -rf $outputHucDataDir exit 0 fi @@ -287,7 +276,7 @@ echo -e $startDiv"Vectorize Pixel Centroids $hucNumber"$stopDiv date -u Tstart [ ! -f $outputHucDataDir/flows_points_pixels.gpkg ] && \ -$libDir/reachID_grid_to_vector_points.py $demDerived_streamPixels $outputHucDataDir/flows_points_pixels.gpkg featureID +python3 -m memory_profiler $srcDir/reachID_grid_to_vector_points.py -r $demDerived_streamPixels -i featureID -p $outputHucDataDir/flows_points_pixels.gpkg Tcount ## GAGE WATERSHED FOR PIXELS ## @@ -303,7 +292,7 @@ echo -e $startDiv"D8 REM $hucNumber"$stopDiv date -u Tstart [ ! -f $outputHucDataDir/rem.tif ] && \ -$libDir/rem.py -d $dem_thalwegCond -w $outputHucDataDir/gw_catchments_pixels.tif -o $outputHucDataDir/rem.tif -t $demDerived_streamPixels +python3 -m memory_profiler $srcDir/rem.py -d $dem_thalwegCond -w $outputHucDataDir/gw_catchments_pixels.tif -o $outputHucDataDir/rem.tif -t $demDerived_streamPixels -i $outputHucDataDir/gw_catchments_reaches.tif -s $outputHucDataDir/demDerived_reaches_split.gpkg Tcount ## DINF DISTANCE DOWN ## @@ -335,10 +324,10 @@ echo -e $startDiv"Process catchments and model streams step 1 $hucNumber"$stopDi date -u Tstart [ ! -f $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.gpkg ] && \ -$libDir/filter_catchments_and_add_attributes.py $outputHucDataDir/gw_catchments_reaches.gpkg $outputHucDataDir/demDerived_reaches_split.gpkg $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.gpkg $outputHucDataDir/demDerived_reaches_split_filtered.gpkg $outputHucDataDir/wbd8_clp.gpkg $hucNumber +python3 -m memory_profiler $srcDir/filter_catchments_and_add_attributes.py -i $outputHucDataDir/gw_catchments_reaches.gpkg -f $outputHucDataDir/demDerived_reaches_split.gpkg -c $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.gpkg -o $outputHucDataDir/demDerived_reaches_split_filtered.gpkg -w $outputHucDataDir/wbd8_clp.gpkg -u $hucNumber if [[ ! -f $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.gpkg ]] ; then - echo "No relevant streams within HUC $hucNumber boundaries. Aborting run_by_unit.sh" + echo "EXIT FLAG!! (exit 65): No relevant streams within HUC $hucNumber boundaries. Aborting run_by_unit.sh" rm -rf $outputHucDataDir exit 0 fi @@ -348,7 +337,7 @@ Tcount echo -e $startDiv"Get Clipped Raster Metadata $hucNumber"$stopDiv date -u Tstart -read fsize ncols nrows ndv_clipped xmin ymin xmax ymax cellsize_resx cellsize_resy<<<$($libDir/getRasterInfoNative.py $outputHucDataDir/gw_catchments_reaches.tif) +read fsize ncols nrows ndv_clipped xmin ymin xmax ymax cellsize_resx cellsize_resy<<<$($srcDir/getRasterInfoNative.py $outputHucDataDir/gw_catchments_reaches.tif) Tcount ## RASTERIZE NEW CATCHMENTS AGAIN ## @@ -395,7 +384,7 @@ Tcount echo -e $startDiv"Generate Catchment List and Stage List Files $hucNumber"$stopDiv date -u Tstart -$libDir/make_stages_and_catchlist.py $outputHucDataDir/demDerived_reaches_split_filtered.gpkg $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.gpkg $outputHucDataDir/stage.txt $outputHucDataDir/catchment_list.txt $stage_min_meters $stage_interval_meters $stage_max_meters +python3 -m memory_profiler $srcDir/make_stages_and_catchlist.py -f $outputHucDataDir/demDerived_reaches_split_filtered.gpkg -c $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.gpkg -s $outputHucDataDir/stage.txt -a $outputHucDataDir/catchment_list.txt -m $stage_min_meters -i $stage_interval_meters -t $stage_max_meters Tcount ## HYDRAULIC PROPERTIES ## @@ -407,11 +396,18 @@ $taudemDir/catchhydrogeo -hand $outputHucDataDir/rem_zeroed_masked.tif -catch $o Tcount ## FINALIZE CATCHMENTS AND MODEL STREAMS ## -echo -e $startDiv"Finalize catchments and model streams $hucNumber"$stopDiv +echo -e $startDiv"Finalize catchments and model streams $hucNumber"$stopDiv output_bathy_thalweg_fileName,output_bathy_xs_lookup_fileName, date -u Tstart [ ! -f $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg ] && \ -$libDir/add_crosswalk.py -d $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.gpkg -a $outputHucDataDir/demDerived_reaches_split_filtered.gpkg -s $outputHucDataDir/src_base.csv -l $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -f $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -r $outputHucDataDir/src_full_crosswalked.csv -j $outputHucDataDir/src.json -x $outputHucDataDir/crosswalk_table.csv -t $outputHucDataDir/hydroTable.csv -w $outputHucDataDir/wbd8_clp.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -y $outputHucDataDir/nwm_catchments_proj_subset.tif -m $manning_n -z $input_NWM_Catchments -p $extent +python3 -m memory_profiler $srcDir/add_crosswalk.py -d $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.gpkg -a $outputHucDataDir/demDerived_reaches_split_filtered.gpkg -s $outputHucDataDir/src_base.csv -l $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -f $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -r $outputHucDataDir/src_full_crosswalked.csv -j $outputHucDataDir/src.json -x $outputHucDataDir/crosswalk_table.csv -t $outputHucDataDir/hydroTable.csv -w $outputHucDataDir/wbd8_clp.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -y $outputHucDataDir/nwm_catchments_proj_subset.tif -m $manning_n -z $input_nwm_catchments -p $extent -k $outputHucDataDir/small_segments.csv +Tcount + +## USGS CROSSWALK ## +echo -e $startDiv"USGS Crosswalk $hucNumber"$stopDiv +date -u +Tstart +python3 -m memory_profiler $srcDir/usgs_gage_crosswalk.py -gages $inputDataDir/usgs_gages/usgs_gages.gpkg -dem $outputHucDataDir/dem_meters.tif -flows $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -cat $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -wbd $outputHucDataDir/wbd_buffered.gpkg -dem_adj $dem_thalwegCond -outtable $outputHucDataDir/usgs_elev_table.csv -e $extent Tcount ## CLEANUP OUTPUTS ## @@ -422,5 +418,5 @@ args=() (( viz == 1 )) && args+=( '-v' ) date -u Tstart -$libDir/output_cleanup.py $hucNumber $outputHucDataDir "${args[@]}" +python3 -m memory_profiler $srcDir/output_cleanup.py $hucNumber $outputHucDataDir "${args[@]}" Tcount diff --git a/src/split_flows.py b/src/split_flows.py new file mode 100755 index 000000000..4dff74fe0 --- /dev/null +++ b/src/split_flows.py @@ -0,0 +1,226 @@ +#!/usr/bin/env python3 + +''' +Description: + 1) split stream segments based on lake boundaries and input threshold distance + 2) calculate channel slope, manning's n, and LengthKm for each segment + 3) create unique ids using HUC8 boundaries (and unique FIM_ID column) + 4) create network traversal attribute columns (To_Node, From_Node, NextDownID) + 5) create points layer with segment verticies encoded with HydroID's (used for catchment delineation in next step) +''' + +import sys +import geopandas as gpd +import pandas as pd +from shapely.geometry import Point, LineString, MultiPoint +import rasterio +import numpy as np +import argparse +from tqdm import tqdm +import time +from os.path import isfile +from os import remove,environ +from collections import OrderedDict +import build_stream_traversal +from utils.shared_functions import getDriver, mem_profile +from utils.shared_variables import FIM_ID + +@mem_profile +def split_flows(max_length, slope_min, lakes_buffer_input, flows_filename, dem_filename, split_flows_filename, split_points_filename, wbd8_clp_filename, lakes_filename): + wbd = gpd.read_file(wbd8_clp_filename) + + toMetersConversion = 1e-3 + + print('Loading data ...') + flows = gpd.read_file(flows_filename) + + if not len(flows) > 0: + print ("No relevant streams within HUC boundaries.") + sys.exit(0) + + wbd8 = gpd.read_file(wbd8_clp_filename) + dem = rasterio.open(dem_filename,'r') + + if isfile(lakes_filename): + lakes = gpd.read_file(lakes_filename) + else: + lakes = None + + wbd8 = wbd8.filter(items=[FIM_ID, 'geometry']) + wbd8 = wbd8.set_index(FIM_ID) + flows = flows.explode() + + # temp + flows = flows.to_crs(wbd8.crs) + + split_flows = [] + slopes = [] + hydro_id = 'HydroID' + + # split at HUC8 boundaries + print ('splitting stream segments at HUC8 boundaries') + flows = gpd.overlay(flows, wbd8, how='union').explode().reset_index(drop=True) + + # check for lake features + if lakes is not None: + if len(lakes) > 0: + print ('splitting stream segments at ' + str(len(lakes)) + ' waterbodies') + #create splits at lake boundaries + lakes = lakes.filter(items=['newID', 'geometry']) + lakes = lakes.set_index('newID') + flows = gpd.overlay(flows, lakes, how='union').explode().reset_index(drop=True) + lakes_buffer = lakes.copy() + lakes_buffer['geometry'] = lakes.buffer(lakes_buffer_input) # adding X meter buffer for spatial join comparison (currently using 20meters) + + print ('splitting ' + str(len(flows)) + ' stream segments based on ' + str(max_length) + ' m max length') + + # remove empty geometries + flows = flows.loc[~flows.is_empty,:] + + for i,lineString in tqdm(enumerate(flows.geometry),total=len(flows.geometry)): + # Reverse geometry order (necessary for BurnLines) + lineString = LineString(lineString.coords[::-1]) + + # skip lines of zero length + if lineString.length == 0: + continue + + # existing reaches of less than max_length + if lineString.length < max_length: + split_flows = split_flows + [lineString] + line_points = [point for point in zip(*lineString.coords.xy)] + + # Calculate channel slope + start_point = line_points[0]; end_point = line_points[-1] + start_elev,end_elev = [i[0] for i in rasterio.sample.sample_gen(dem,[start_point,end_point])] + slope = float(abs(start_elev - end_elev) / lineString.length) + if slope < slope_min: + slope = slope_min + slopes = slopes + [slope] + continue + + splitLength = lineString.length / np.ceil(lineString.length / max_length) + + cumulative_line = [] + line_points = [] + last_point = [] + + last_point_in_entire_lineString = list(zip(*lineString.coords.xy))[-1] + + for point in zip(*lineString.coords.xy): + + cumulative_line = cumulative_line + [point] + line_points = line_points + [point] + numberOfPoints_in_cumulative_line = len(cumulative_line) + + if last_point: + cumulative_line = [last_point] + cumulative_line + numberOfPoints_in_cumulative_line = len(cumulative_line) + elif numberOfPoints_in_cumulative_line == 1: + continue + + cumulative_length = LineString(cumulative_line).length + + if cumulative_length >= splitLength: + + splitLineString = LineString(cumulative_line) + split_flows = split_flows + [splitLineString] + + # Calculate channel slope + start_point = cumulative_line[0]; end_point = cumulative_line[-1] + start_elev,end_elev = [i[0] for i in rasterio.sample.sample_gen(dem,[start_point,end_point])] + slope = float(abs(start_elev - end_elev) / splitLineString.length) + if slope < slope_min: + slope = slope_min + slopes = slopes + [slope] + + last_point = end_point + + if (last_point == last_point_in_entire_lineString): + continue + + cumulative_line = [] + line_points = [] + + splitLineString = LineString(cumulative_line) + split_flows = split_flows + [splitLineString] + + # Calculate channel slope + start_point = cumulative_line[0]; end_point = cumulative_line[-1] + start_elev,end_elev = [i[0] for i in rasterio.sample.sample_gen(dem,[start_point,end_point])] + slope = float(abs(start_elev - end_elev) / splitLineString.length) + if slope < slope_min: + slope = slope_min + slopes = slopes + [slope] + + split_flows_gdf = gpd.GeoDataFrame({'S0' : slopes ,'geometry':split_flows}, crs=flows.crs, geometry='geometry') + split_flows_gdf['LengthKm'] = split_flows_gdf.geometry.length * toMetersConversion + if lakes is not None: + split_flows_gdf = gpd.sjoin(split_flows_gdf, lakes_buffer, how='left', op='within') #options: intersects, within, contains, crosses + split_flows_gdf = split_flows_gdf.rename(columns={"index_right": "LakeID"}).fillna(-999) + else: + split_flows_gdf['LakeID'] = -999 + + # need to figure out why so many duplicate stream segments for 04010101 FR + split_flows_gdf = split_flows_gdf.drop_duplicates() + + # Create Ids and Network Traversal Columns + addattributes = build_stream_traversal.build_stream_traversal_columns() + tResults=None + tResults = addattributes.execute(split_flows_gdf, wbd8, hydro_id) + if tResults[0] == 'OK': + split_flows_gdf = tResults[1] + else: + print ('Error: Could not add network attributes to stream segments') + + # remove single node segments + split_flows_gdf = split_flows_gdf.query("From_Node != To_Node") + + # Get all vertices + split_points = OrderedDict() + for index, segment in split_flows_gdf.iterrows(): + lineString = segment.geometry + + for point in zip(*lineString.coords.xy): + if point in split_points: + if segment.NextDownID == split_points[point]: + pass + else: + split_points[point] = segment[hydro_id] + else: + split_points[point] = segment[hydro_id] + + hydroIDs_points = [hidp for hidp in split_points.values()] + split_points = [Point(*point) for point in split_points] + + split_points_gdf = gpd.GeoDataFrame({'id': hydroIDs_points , 'geometry':split_points}, crs=flows.crs, geometry='geometry') + + print('Writing outputs ...') + + if isfile(split_flows_filename): + remove(split_flows_filename) + split_flows_gdf.to_file(split_flows_filename,driver=getDriver(split_flows_filename),index=False) + + if isfile(split_points_filename): + remove(split_points_filename) + split_points_gdf.to_file(split_points_filename,driver=getDriver(split_points_filename),index=False) + + +if __name__ == '__main__': + max_length = float(environ['max_split_distance_meters']) + slope_min = float(environ['slope_min']) + lakes_buffer_input = float(environ['lakes_buffer_dist_meters']) + + # Parse arguments. + parser = argparse.ArgumentParser(description='splitflows.py') + parser.add_argument('-f', '--flows-filename', help='flows-filename',required=True) + parser.add_argument('-d', '--dem-filename', help='dem-filename',required=True) + parser.add_argument('-s', '--split-flows-filename', help='split-flows-filename',required=True) + parser.add_argument('-p', '--split-points-filename', help='split-points-filename',required=True) + parser.add_argument('-w', '--wbd8-clp-filename', help='wbd8-clp-filename',required=True) + parser.add_argument('-l', '--lakes-filename', help='lakes-filename',required=True) + + # Extract to dictionary and assign to variables. + args = vars(parser.parse_args()) + + split_flows(max_length, slope_min, lakes_buffer_input, **args) diff --git a/src/time_and_tee_run_by_unit.sh b/src/time_and_tee_run_by_unit.sh new file mode 100755 index 000000000..431beef97 --- /dev/null +++ b/src/time_and_tee_run_by_unit.sh @@ -0,0 +1,10 @@ +#!/bin/bash -e + +if [[ "$mem" == "1" ]] ; then + mprof run -o $1.dat --include-children /usr/bin/time -v $srcDir/run_by_unit.sh $1 |& tee $outputRunDataDir/logs/$1.log + mprof plot -o $outputRunDataDir/logs/$1_memory $1.dat +else + /usr/bin/time -v $srcDir/run_by_unit.sh $1 |& tee $outputRunDataDir/logs/$1.log +fi + +exit ${PIPESTATUS[0]} diff --git a/lib/unique_pixel_and_allocation.py b/src/unique_pixel_and_allocation.py similarity index 94% rename from lib/unique_pixel_and_allocation.py rename to src/unique_pixel_and_allocation.py index 8f5bce600..0df89560c 100755 --- a/lib/unique_pixel_and_allocation.py +++ b/src/unique_pixel_and_allocation.py @@ -8,11 +8,14 @@ import numpy as np import argparse from r_grow_distance import r_grow_distance +from utils.shared_functions import mem_profile + +@mem_profile def stream_pixel_zones(stream_pixels, unique_stream_pixels, grass_workspace): ''' - This function will assign a unique ID for each stream pixel and writes to file. It then uses this raster to run GRASS r.grow.distance tool to create the allocation and proximity rasters required to complete the lateral thalweg conditioning. - + This function will assign a unique ID for each stream pixel and writes to file. It then uses this raster to run GRASS r.grow.distance tool to create the allocation and proximity rasters required to complete the lateral thalweg conditioning. + Parameters ---------- stream_pixels : STR @@ -34,27 +37,28 @@ def stream_pixel_zones(stream_pixels, unique_stream_pixels, grass_workspace): # Import stream pixel raster with rasterio.open(stream_pixels) as temp: streams_profile = temp.profile - streams = temp.read(1) - + streams = temp.read(1) + # Create array that matches shape of streams raster with unique values for each cell. Dataype is float64. unique_vals = np.arange(streams.size, dtype = 'float64').reshape(*streams.shape) # At streams return the unique array value otherwise return NODATA value from input streams layer. NODATA value for demDerived_streamPixels.tif is -32768. stream_pixel_values = np.where(streams == 1, unique_vals, streams_profile['nodata']) - + # Reassign dtype to be float64 (needs to be float64) streams_profile.update(dtype = 'float64') - - # Output to raster + + # Output to raster with rasterio.Env(): with rasterio.open(unique_stream_pixels, 'w', **streams_profile) as raster: raster.write(stream_pixel_values,1) - + # Compute allocation and proximity grid using r.grow.distance. Output distance grid in meters. Set datatype for output allocation (needs to be float64) and proximity grids (float32). distance_grid, allocation_grid = r_grow_distance(unique_stream_pixels, grass_workspace, 'Float32', 'Float64') return distance_grid, allocation_grid + if __name__ == '__main__': #Parse arguments @@ -70,9 +74,6 @@ def stream_pixel_zones(stream_pixels, unique_stream_pixels, grass_workspace): stream_pixels = args['stream'] unique_stream_pixels = args['out'] grass_workspace = args['grass_workspace'] - + # Run stream_pixel_zones stream_pixel_zones(stream_pixels, unique_stream_pixels, grass_workspace) - - - diff --git a/src/usgs_gage_crosswalk.py b/src/usgs_gage_crosswalk.py new file mode 100755 index 000000000..3b7b54256 --- /dev/null +++ b/src/usgs_gage_crosswalk.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python3 + +import geopandas as gpd +import pandas as pd +import rasterio +import argparse +import pygeos +from shapely.wkb import dumps, loads +import warnings +from utils.shared_functions import mem_profile +warnings.simplefilter("ignore") + + +''' Get elevation at adjusted USGS gages locations + + Parameters + ---------- + usgs_gages_filename : str + File name of USGS stations layer. + dem_filename : str + File name of original DEM. + input_flows_filename : str + File name of FIM streams layer. + input_catchment_filename : str + File name of FIM catchment layer. + wbd_buffer_filename : str + File name of buffered wbd. + dem_adj_filename : str + File name of thalweg adjusted DEM. + output_table_filename : str + File name of output table. +''' + + +@mem_profile +def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,input_catchment_filename,wbd_buffer_filename,dem_adj_filename,output_table_filename,extent): + + wbd_buffer = gpd.read_file(wbd_buffer_filename) + usgs_gages = gpd.read_file(usgs_gages_filename, mask=wbd_buffer) + dem_m = rasterio.open(dem_filename,'r') + input_flows = gpd.read_file(input_flows_filename) + input_catchment = gpd.read_file(input_catchment_filename) + dem_adj = rasterio.open(dem_adj_filename,'r') + + #MS extent use gages that are mainstem + if extent == "MS": + usgs_gages = usgs_gages.query('curve == "yes" & mainstem == "yes"') + #FR extent use gages that are not mainstem + if extent == "FR": + usgs_gages = usgs_gages.query('curve == "yes" & mainstem == "no"') + + if input_flows.HydroID.dtype != 'int': input_flows.HydroID = input_flows.HydroID.astype(int) + + # Identify closest HydroID + closest_catchment = gpd.sjoin(usgs_gages, input_catchment, how='left', op='within').reset_index(drop=True) + closest_hydro_id = closest_catchment.filter(items=['location_id','HydroID','min_thal_elev','med_thal_elev','max_thal_elev', 'order_']) + closest_hydro_id = closest_hydro_id.dropna() + + # Get USGS gages that are within catchment boundaries + usgs_gages = usgs_gages.loc[usgs_gages.location_id.isin(list(closest_hydro_id.location_id))] + + columns = ['location_id','HydroID','dem_elevation','dem_adj_elevation','min_thal_elev', 'med_thal_elev','max_thal_elev','str_order'] + gage_data = [] + + # Move USGS gage to stream + for index, gage in usgs_gages.iterrows(): + + # Get stream attributes + hydro_id = closest_hydro_id.loc[closest_hydro_id.location_id==gage.location_id].HydroID.item() + str_order = str(int(closest_hydro_id.loc[closest_hydro_id.location_id==gage.location_id].order_.item())) + min_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.location_id==gage.location_id].min_thal_elev.item(),2) + med_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.location_id==gage.location_id].med_thal_elev.item(),2) + max_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.location_id==gage.location_id].max_thal_elev.item(),2) + + # Convert headwater point geometries to WKB representation + wkb_gages = dumps(gage.geometry) + + # Create pygeos headwater point geometries from WKB representation + gage_bin_geom = pygeos.io.from_wkb(wkb_gages) + + # Closest segment to headwater + closest_stream = input_flows.loc[input_flows.HydroID==hydro_id] + wkb_closest_stream = dumps(closest_stream.geometry.item()) + stream_bin_geom = pygeos.io.from_wkb(wkb_closest_stream) + + # Linear reference headwater to closest stream segment + gage_distance_to_line = pygeos.linear.line_locate_point(stream_bin_geom, gage_bin_geom) + referenced_gage = pygeos.linear.line_interpolate_point(stream_bin_geom, gage_distance_to_line) + + # Convert geometries to wkb representation + bin_referenced_gage = pygeos.io.to_wkb(referenced_gage) + + # Convert to shapely geometries + shply_referenced_gage = loads(bin_referenced_gage) + + # Sample rasters at adjusted gage + dem_m_elev = round(list(rasterio.sample.sample_gen(dem_m,shply_referenced_gage.coords))[0].item(),2) + dem_adj_elev = round(list(rasterio.sample.sample_gen(dem_adj,shply_referenced_gage.coords))[0].item(),2) + + # Append dem_m_elev, dem_adj_elev, hydro_id, and gage number to table + site_elevations = [str(gage.location_id), str(hydro_id), dem_m_elev, dem_adj_elev, min_thal_elev, med_thal_elev, max_thal_elev,str(str_order)] + gage_data.append(site_elevations) + + elev_table = pd.DataFrame(gage_data, columns=columns) + + if not elev_table.empty: + elev_table.to_csv(output_table_filename,index=False) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Crosswalk USGS sites to HydroID and get elevations') + parser.add_argument('-gages','--usgs-gages-filename', help='USGS gages', required=True) + parser.add_argument('-dem','--dem-filename',help='DEM',required=True) + parser.add_argument('-flows','--input-flows-filename', help='DEM derived streams', required=True) + parser.add_argument('-cat','--input-catchment-filename', help='DEM derived catchments', required=True) + parser.add_argument('-wbd','--wbd-buffer-filename', help='WBD buffer', required=True) + parser.add_argument('-dem_adj','--dem-adj-filename', help='Thalweg adjusted DEM', required=True) + parser.add_argument('-outtable','--output-table-filename', help='Table to append data', required=True) + parser.add_argument('-e', '--extent', help="extent configuration entered by user when running fim_run.sh", required = True) + + args = vars(parser.parse_args()) + + usgs_gages_filename = args['usgs_gages_filename'] + dem_filename = args['dem_filename'] + input_flows_filename = args['input_flows_filename'] + input_catchment_filename = args['input_catchment_filename'] + wbd_buffer_filename = args['wbd_buffer_filename'] + dem_adj_filename = args['dem_adj_filename'] + output_table_filename = args['output_table_filename'] + extent = args['extent'] + + crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,input_catchment_filename,wbd_buffer_filename, dem_adj_filename,output_table_filename, extent) diff --git a/tests/__init__.py b/src/utils/__init__.py similarity index 100% rename from tests/__init__.py rename to src/utils/__init__.py diff --git a/lib/utils/archive_cleanup.py b/src/utils/archive_cleanup.py similarity index 100% rename from lib/utils/archive_cleanup.py rename to src/utils/archive_cleanup.py diff --git a/src/utils/reproject_dem.py b/src/utils/reproject_dem.py new file mode 100755 index 000000000..dba8f65de --- /dev/null +++ b/src/utils/reproject_dem.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 + +import os +from osgeo import gdal +import sys +sys.path.append('/foss_fim/src') +from utils.shared_variables import PREP_PROJECTION_CM +import shutil +from multiprocessing import Pool +import argparse + + +def reproject_dem(args): + + raster_dir = args[0] + elev_cm = args[1] + elev_cm_proj = args[2] + reprojection = args[3] + + if os.path.exists(elev_cm_proj): + os.remove(elev_cm_proj) + + shutil.copy(elev_cm, elev_cm_proj) + + print(f"Reprojecting {elev_cm_proj}") + gdal.Warp(elev_cm_proj,elev_cm_proj,dstSRS=reprojection) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Burn in NLD elevations') + parser.add_argument('-dem_dir','--dem-dir', help='DEM filename', required=True,type=str) + parser.add_argument('-j','--number-of-jobs',help='Number of processes to use. Default is 1.',required=False, default="1",type=int) + + args = vars(parser.parse_args()) + + dem_dir = args['dem_dir'] + number_of_jobs = args['number_of_jobs'] + + reproject_procs_list = [] + + for huc in os.listdir(dem_dir): + raster_dir = os.path.join(dem_dir,huc) + elev_cm = os.path.join(raster_dir, 'elev_cm.tif') + elev_cm_proj = os.path.join(raster_dir, 'elev_cm_proj.tif') + reproject_procs_list.append([raster_dir, elev_cm, elev_cm_proj, PREP_PROJECTION_CM]) + + # Multiprocess reprojection + with Pool(processes=number_of_jobs) as pool: + pool.map(reproject_dem, reproject_procs_list) diff --git a/src/utils/shared_functions.py b/src/utils/shared_functions.py new file mode 100644 index 000000000..b01533c7d --- /dev/null +++ b/src/utils/shared_functions.py @@ -0,0 +1,219 @@ +#!/usr/bin/env python3 + +import os +from os.path import splitext +import rasterio +import numpy as np +from rasterio.warp import calculate_default_transform, reproject, Resampling +from pyproj.crs import CRS + +def getDriver(fileName): + + driverDictionary = {'.gpkg' : 'GPKG','.geojson' : 'GeoJSON','.shp' : 'ESRI Shapefile'} + driver = driverDictionary[splitext(fileName)[1]] + + return(driver) + +def pull_file(url, full_pulled_filepath): + """ + This helper function pulls a file and saves it to a specified path. + + Args: + url (str): The full URL to the file to download. + full_pulled_filepath (str): The full system path where the downloaded file will be saved. + """ + import urllib.request + + print("Pulling " + url) + urllib.request.urlretrieve(url, full_pulled_filepath) + + +def delete_file(file_path): + """ + This helper function deletes a file. + + Args: + file_path (str): System path to a file to be deleted. + """ + + try: + os.remove(file_path) + except FileNotFoundError: + pass + + +def run_system_command(args): + """ + This helper function takes a system command and runs it. This function is designed for use + in multiprocessing. + + Args: + args (list): A single-item list, the first and only item being a system command string. + """ + + # Parse system command. + command = args[0] + + # Run system command. + os.system(command) + + +def subset_wbd_gpkg(wbd_gpkg, multilayer_wbd_geopackage): + + import geopandas as gp + from utils.shared_variables import CONUS_STATE_LIST, PREP_PROJECTION + + print("Subsetting " + wbd_gpkg + "...") + # Read geopackage into dataframe. + wbd = gp.read_file(wbd_gpkg) + gdf = gp.GeoDataFrame(wbd) + + for index, row in gdf.iterrows(): + state = row["STATES"] + if state != None: # Some polygons are empty in the STATES field. + keep_flag = False # Default to Fault, i.e. to delete the polygon. + if state in CONUS_STATE_LIST: + keep_flag = True + # Only split if multiple states present. More efficient this way. + elif len(state) > 2: + for wbd_state in state.split(","): # Some polygons have multiple states, separated by a comma. + if wbd_state in CONUS_STATE_LIST: # Check each polygon to make sure it's state abbrev name is allowed. + keep_flag = True + break + if not keep_flag: + gdf.drop(index, inplace=True) # Delete from dataframe. + + # Overwrite geopackage. + layer_name = os.path.split(wbd_gpkg)[1].strip('.gpkg') + gdf.crs = PREP_PROJECTION + gdf.to_file(multilayer_wbd_geopackage, layer=layer_name,driver='GPKG',index=False) + + +def update_raster_profile(args): + + elev_cm_filename = args[0] + elev_m_filename = args[1] + projection = args[2] + nodata_val = args[3] + blocksize = args[4] + keep_intermediate = args[5] + + if isinstance(blocksize, int): + pass + elif isinstance(blocksize,str): + blocksize = int(blocksize) + elif isinstance(blocksize,float): + blocksize = int(blocksize) + else: + raise TypeError("Pass integer for blocksize") + + assert elev_cm_filename.endswith('.tif'), "input raster needs to be a tif" + + # Update nodata value and convert from cm to meters + dem_cm = rasterio.open(elev_cm_filename) + + no_data = dem_cm.nodata + data = dem_cm.read(1) + + dem_m = np.where(data == int(no_data), nodata_val, (data/100).astype(rasterio.float32)) + + del data + + dem_m_profile = dem_cm.profile.copy() + + dem_m_profile.update(driver='GTiff',tiled=True,nodata=nodata_val, + blockxsize=blocksize, blockysize=blocksize, + dtype='float32',crs=projection,compress='lzw',interleave='band') + + with rasterio.open(elev_m_filename, "w", **dem_m_profile, BIGTIFF='YES') as dest: + dest.write(dem_m, indexes = 1) + + if keep_intermediate == False: + os.remove(elev_cm_filename) + + del dem_m + dem_cm.close() + + +''' +This function isn't currently used but is the preferred method for +reprojecting elevation grids. + +Several USGS elev_cm.tifs have the crs value in their profile stored as the string "CRS.from_epsg(26904)" +instead of the actual output of that command. + +Rasterio fails to properly read the crs but using gdal retrieves the correct projection. +Until this issue is resolved use the reproject_dem function in reproject_dem.py instead. +reproject_dem is not stored in the shared_functions.py because rasterio and +gdal bindings are not entirely compatible: https://rasterio.readthedocs.io/en/latest/topics/switch.html + +''' + +def reproject_raster(input_raster_name,reprojection,blocksize=None,reprojected_raster_name=None): + + if blocksize is not None: + if isinstance(blocksize, int): + pass + elif isinstance(blocksize,str): + blocksize = int(blocksize) + elif isinstance(blocksize,float): + blocksize = int(blocksize) + else: + raise TypeError("Pass integer for blocksize") + else: + blocksize = 256 + + assert input_raster_name.endswith('.tif'), "input raster needs to be a tif" + + reprojection = rasterio.crs.CRS.from_string(reprojection) + + with rasterio.open(input_raster_name) as src: + + # Check projection + if src.crs.to_string() != reprojection: + if src.crs.to_string().startswith('EPSG'): + epsg = src.crs.to_epsg() + proj_crs = CRS.from_epsg(epsg) + rio_crs = rasterio.crs.CRS.from_user_input(proj_crs).to_string() + else: + rio_crs = src.crs.to_string() + + print(f"{input_raster_name} not projected") + print(f"Reprojecting from {rio_crs} to {reprojection}") + + transform, width, height = calculate_default_transform( + src.crs, reprojection, src.width, src.height, *src.bounds) + kwargs = src.meta.copy() + kwargs.update({ + 'crs': reprojection, + 'transform': transform, + 'width': width, + 'height': height, + 'compress': 'lzw' + }) + + if reprojected_raster_name is None: + reprojected_raster_name = input_raster_name + + assert reprojected_raster_name.endswith('.tif'), "output raster needs to be a tif" + + with rasterio.open(reprojected_raster_name, 'w', **kwargs, tiled=True, blockxsize=blocksize, blockysize=blocksize, BIGTIFF='YES') as dst: + reproject( + source=rasterio.band(src, 1), + destination=rasterio.band(dst, 1), + src_transform=src.transform, + src_crs=rio_crs, + dst_transform=transform, + dst_crs=reprojection.to_string(), + resampling=Resampling.nearest) + del dst + del src + + +def mem_profile(func): + def wrapper(*args, **kwargs): + if (os.environ.get('mem') == "1"): + profile(func)(*args, **kwargs) + else: + func(*args, **kwargs) + return wrapper diff --git a/src/utils/shared_variables.py b/src/utils/shared_variables.py new file mode 100644 index 000000000..44bc27f8b --- /dev/null +++ b/src/utils/shared_variables.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 + +import os + +# Projections. +#PREP_PROJECTION = "+proj=aea +datum=NAD83 +x_0=0.0 +y_0=0.0 +lon_0=96dW +lat_0=23dN +lat_1=29d30'N +lat_2=45d30'N +towgs84=-0.9956000824677655,1.901299877314078,0.5215002840524426,0.02591500053005733,0.009425998542707753,0.01159900118427752,-0.00062000005129903 +no_defs +units=m" +PREP_PROJECTION_CM = 'PROJCS["USA_Contiguous_Albers_Equal_Area_Conic_USGS_version",GEOGCS["GCS_North_American_1983",DATUM["D_North_American_1983",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Albers"],PARAMETER["false_easting",0.0],PARAMETER["false_northing",0.0],PARAMETER["central_meridian",-96.0],PARAMETER["standard_parallel_1",29.5],PARAMETER["standard_parallel_2",45.5],PARAMETER["latitude_of_origin",23.0],UNIT["Meter",1.0],VERTCS["NAVD_1988",VDATUM["North_American_Vertical_Datum_1988"],PARAMETER["Vertical_Shift",0.0],PARAMETER["Direction",1.0],UNIT["Centimeter",0.01]]]' +PREP_PROJECTION = 'PROJCS["USA_Contiguous_Albers_Equal_Area_Conic_USGS_version",GEOGCS["NAD83",DATUM["North_American_Datum_1983",SPHEROID["GRS 1980",6378137,298.2572221010042,AUTHORITY["EPSG","7019"]],AUTHORITY["EPSG","6269"]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AUTHORITY["EPSG","4269"]],PROJECTION["Albers_Conic_Equal_Area"],PARAMETER["standard_parallel_1",29.5],PARAMETER["standard_parallel_2",45.5],PARAMETER["latitude_of_center",23],PARAMETER["longitude_of_center",-96],PARAMETER["false_easting",0],PARAMETER["false_northing",0],UNIT["metre",1,AUTHORITY["EPSG","9001"]]]' +VIZ_PROJECTION ='PROJCS["WGS_1984_Web_Mercator_Auxiliary_Sphere",GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Mercator_Auxiliary_Sphere"],PARAMETER["False_Easting",0.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",0.0],PARAMETER["Standard_Parallel_1",0.0],PARAMETER["Auxiliary_Sphere_Type",0.0],UNIT["Meter",1.0]]' +# -- Data URLs-- # +NHD_URL_PARENT = r'https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/NHDPlusHR/Beta/GDB/' +NWM_HYDROFABRIC_URL = r'http://www.nohrsc.noaa.gov/pub/staff/keicher/NWM_live/web/data_tools/NWM_channel_hydrofabric.tar.gz' # Temporary +WBD_NATIONAL_URL = r'https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/WBD/National/GDB/WBD_National_GDB.zip' +WBD_HU2_URL_PARENT = r'http://prd-tnm.s3-website-us-west-2.amazonaws.com/?prefix=StagedProducts/Hydrography/WBD/HU2/GDB' + +# -- Prefixes and Suffixes -- # +NHD_URL_PREFIX = 'NHDPLUS_H_' +NHD_RASTER_URL_SUFFIX = '_HU4_RASTER.7z' +NHD_VECTOR_URL_SUFFIX = '_HU4_GDB.zip' +NHD_RASTER_EXTRACTION_PREFIX = 'HRNHDPlusRasters' +NHD_RASTER_EXTRACTION_SUFFIX = 'elev_cm.tif' + +NHD_VECTOR_EXTRACTION_PREFIX = 'NHDPLUS_H_' +NHD_VECTOR_EXTRACTION_SUFFIX = '_HU4_GDB.zip' + +# -- Field Names -- # +FIM_ID = 'fimid' + +# -- Other -- # +CONUS_STATE_LIST = {"AL", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA", + "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", + "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", + "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "PR", "RI", "SC", + "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"} + +OVERWRITE_WBD = 'OVERWRITE_WBD' +OVERWRITE_NHD = 'OVERWRITE_NHD' +OVERWRITE_ALL = 'OVERWRITE_ALL' + +## Input Paths and Directories +# Directories +os.environ['src_dir'] = '/foss_fim/src' +os.environ['input_dir'] = 'data/inputs' + +os.environ['nhdplus_rasters_dir'] = os.path.join(os.environ.get('input_dir'),'nhdplus_rasters') +os.environ['nhdplus_vectors_dir'] = os.path.join(os.environ.get('input_dir'),'nhdplus_vectors') +os.environ['nwm_dir'] = os.path.join(os.environ.get('input_dir'),'nwm_hydrofabric') +os.environ['wbd_dir'] = os.path.join(os.environ.get('input_dir'),'wbd') +os.environ['ahps_dir'] = os.path.join(os.environ.get('input_dir'),'ahps_sites') +os.environ['nhdplus_aggregate_dir'] = os.path.join(os.environ.get('input_dir'),'nhdplus_vectors_aggregate') + +# File Paths +os.environ['wbd_filename'] = os.path.join(os.environ.get('wbd_dir'),'WBD_National.gpkg') +os.environ['nwm_streams_orig_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_flows_original.gpkg') +os.environ['nwm_streams_all_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_flows.gpkg') +os.environ['nwm_headwaters_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_headwaters.gpkg') +os.environ['nwm_huc4_intersections_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_huc4_intersections.gpkg') +os.environ['nhd_huc8_intersections_filename'] = os.path.join(os.environ.get('nwm_dir'),'nhd_huc8_intersections.gpkg') +os.environ['ahps_filename'] = os.path.join(os.environ.get('ahps_dir'),'nws_lid.gpkg') +os.environ['agg_nhd_headwaters_adj_fileName'] = os.path.join(os.environ.get('nhdplus_aggregate_dir'),'agg_nhd_headwaters_adj.gpkg') +os.environ['agg_nhd_streams_adj_fileName'] = os.path.join(os.environ.get('nhdplus_aggregate_dir'),'agg_nhd_streams_adj.gpkg') +os.environ['nwm_catchments_orig_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_catchments_original.gpkg') +os.environ['nwm_catchments_all_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_catchments.gpkg') diff --git a/src/vary_mannings_n_composite.py b/src/vary_mannings_n_composite.py new file mode 100755 index 000000000..0490cc368 --- /dev/null +++ b/src/vary_mannings_n_composite.py @@ -0,0 +1,256 @@ +#!/usr/bin/env python3 + +import os +import sys +import pandas as pd +import numpy as np +import argparse +import matplotlib.pyplot as plt +import seaborn as sns +from functools import reduce +from multiprocessing import Pool +from os.path import isfile, join, dirname, isdir +import shutil +import warnings +from pathlib import Path +import datetime as dt +sns.set_theme(style="whitegrid") +warnings.simplefilter(action='ignore', category=FutureWarning) + +""" + Vary the Manning's n values for in-channel vs. floodplain + + Parameters + ---------- + fim_dir : str + Directory containing FIM output folders. + channel_ratio_src_column : str + SRC attribute containing the channel vs. floodplain attribute + mann_n_table : str + Path to a csv file containing Manning's n values by feature_id + file_suffix : str + Suffix to append to the output log file + number_of_jobs : str + Number of jobs. + src_plot_option : str + Optional (True or False): use this flag to crate src plots for all hydroids +""" + +def variable_mannings_calc(args): + + in_src_bankfull_filename = args[0] + channel_ratio_src_column = args[1] + df_mann = args[2] + huc = args[3] + out_src_vmann_filename = args[4] + htable_filename = args[5] + src_plot_option = args[6] + huc_output_dir = args[7] + viz_clean_flag = args[8] + + ## Read the src_full_crosswalked.csv + log_text = 'Calculating: ' + str(huc) + '\n' + df_src = pd.read_csv(in_src_bankfull_filename,dtype={'feature_id': 'int64'}) + + ## Check that the channel ratio column the user specified exists in the def + if channel_ratio_src_column not in df_src.columns: + log_text += 'WARNING --> ' + str(huc) + in_src_bankfull_filename + ' does not contain the specified channel ratio column: ' + channel_ratio_src_column + '\n' + else: + ## Raname the current discharge & ManningN columns + df_src = df_src.rename(columns={'Discharge (m3s-1)':'default_Discharge (m3s-1)','ManningN':'default_ManningN'}) + ## Merge (crosswalk) the df of Manning's n with the SRC df (using the channel/fplain delination in the channel_ratio_src_column) + df_src = df_src.merge(df_mann, how='left', on='feature_id') + check_null = df_src['channel_n'].isnull().sum() + df_src['overbank_n'].isnull().sum() + if check_null > 0: + log_text += str(huc) + ' --> ' + 'Null feature_ids found in crosswalk btw roughness dataframe and src dataframe' + ' --> missing entries= ' + str(check_null/84) + '\n' + + ## Calculate composite Manning's n using the channel geometry ratio attribute given by user (e.g. chann_hradius_ratio or chann_vol_ratio) + df_src['comp_ManningN'] = (df_src[channel_ratio_src_column]*df_src['channel_n']) + ((1.0 - df_src[channel_ratio_src_column])*df_src['overbank_n']) + #print('Done calculating composite Manning n (' + channel_ratio_src_column + '): ' + str(huc)) + + ## Check if there are any missing data in the composite ManningN column + check_null_comp = df_src['comp_ManningN'].isnull().sum() + if check_null_comp > 0: + log_text += str(huc) + ' --> ' + 'Missing values in the comp_ManningN calculation' + ' --> missing entries= ' + str(check_null_comp/84) + '\n' + df_src['vmann_on'] = np.where(df_src['comp_ManningN'].isnull(), False, True) # create field to identify where vmann is applied (True=yes; False=no) + + ## Define the channel geometry variable names to use from the src + hydr_radius = 'HydraulicRadius (m)' + wet_area = 'WetArea (m2)' + + ## Calculate Q using Manning's equation + #df_src.rename(columns={'Discharge (m3s-1)'}, inplace=True) # rename the previous Discharge column + df_src['Discharge (m3s-1)_varMann'] = df_src[wet_area]* \ + pow(df_src[hydr_radius],2.0/3)* \ + pow(df_src['SLOPE'],0.5)/df_src['comp_ManningN'] + + ## Set Q values to 0 and -999 for specified criteria + df_src['Discharge (m3s-1)_varMann'].mask(df_src['Stage'] == 0,0,inplace=True) + if 'Thalweg_burn_elev' in df_src: + df_src['Discharge (m3s-1)_varMann'].mask(df_src['Stage'] == df_src['Thalweg_burn_elev'],0,inplace=True) + df_src['Discharge (m3s-1)_varMann'].mask(df_src['Stage'] < df_src['Thalweg_burn_elev'],-999,inplace=True) + + ## Use the default discharge column when vmann is not being applied + df_src['Discharge (m3s-1)_varMann'] = np.where(df_src['vmann_on']==False, df_src['default_Discharge (m3s-1)'], df_src['Discharge (m3s-1)_varMann']) # reset the discharge value back to the original if vmann=false + df_src['comp_ManningN'] = np.where(df_src['vmann_on']==False, df_src['default_ManningN'], df_src['comp_ManningN']) # reset the ManningN value back to the original if vmann=false + + ## Output new SRC with bankfull column + df_src.to_csv(out_src_vmann_filename,index=False) + + ## Output new hydroTable with updated discharge and ManningN column + df_src_trim = df_src[['HydroID','Stage','vmann_on',channel_ratio_src_column,'Discharge (m3s-1)_varMann','comp_ManningN']] + df_src_trim = df_src_trim.rename(columns={'Stage':'stage','Discharge (m3s-1)_varMann': 'discharge_cms','comp_ManningN':'ManningN'}) + df_htable = pd.read_csv(htable_filename,dtype={'HUC': str}) + df_htable.rename(columns={'ManningN':'orig_ManningN'},inplace=True) + df_htable.drop(['vmann_on'], axis=1, inplace=True) # drop the default "vmann_on" variable from add_crosswalk.py + if not set(['orig_discharge_cms']).issubset(df_htable.columns): + df_htable.rename(columns={'discharge_cms':'orig_discharge_cms'},inplace=True) + else: + df_htable.drop(['discharge_cms'], axis=1, inplace=True) # drop the previously modified discharge column to be replaced with updated version + df_htable = df_htable.merge(df_src_trim, how='left', left_on=['HydroID','stage'], right_on=['HydroID','stage']) + + # Delete intermediate CSVs outputs. Todo delete this block later. + htable_parent_dir = os.path.split(htable_filename)[0] + # List all CSVs. + file_list = os.listdir(htable_parent_dir) + for f in file_list: + if viz_clean_flag == 1: # if using the viz flag then delete all intermediate csv files + if '.csv' in f: + if f != 'hydroTable.csv': + os.remove(os.path.join(htable_parent_dir, f)) + else: + keep_files = ['usgs_elev_table.csv', 'src_base.csv', 'small_segments.csv'] + if '.csv' in f: + if f not in keep_files: + os.remove(os.path.join(htable_parent_dir, f)) + + df_htable.to_csv(htable_filename,index=False) + + log_text += 'Completed: ' + str(huc) + + ## plot rating curves + if src_plot_option == 'True': + if isdir(huc_output_dir) == False: + os.mkdir(huc_output_dir) + generate_src_plot(df_src, huc_output_dir) + + return(log_text) + +def generate_src_plot(df_src, plt_out_dir): + + ## create list of unique hydroids + hydroids = df_src.HydroID.unique().tolist() + + ## plot each hydroid SRC in the huc + for hydroid in hydroids: + print("Creating SRC plot: " + str(hydroid)) + plot_df = df_src.loc[df_src['HydroID'] == hydroid] + + f, ax = plt.subplots(figsize=(6.5, 6.5)) + ax.set_title(str(hydroid)) + sns.despine(f, left=True, bottom=True) + sns.scatterplot(x='Discharge (m3s-1)', y='Stage', data=plot_df, label="Orig SRC", ax=ax, color='blue') + sns.scatterplot(x='Discharge (m3s-1)_varMann', y='Stage', data=plot_df, label="SRC w/ vMann", ax=ax, color='orange') + sns.lineplot(x='Discharge (m3s-1)', y='Stage_1_5', data=plot_df, color='green', ax=ax) + plt.fill_between(plot_df['Discharge (m3s-1)'], plot_df['Stage_1_5'],alpha=0.5) + plt.text(plot_df['Discharge (m3s-1)'].median(), plot_df['Stage_1_5'].median(), "NWM 1.5yr: " + str(plot_df['Stage_1_5'].median())) + ax.legend() + plt.savefig(plt_out_dir + os.sep + str(hydroid) + '_vmann.png',dpi=175, bbox_inches='tight') + plt.close() + +# for hydroid in hydroids: +# print("Creating SRC plot: " + str(hydroid)) +# plot_df = df_src.loc[df_src['HydroID'] == hydroid] +# +# f, ax = plt.subplots(figsize=(6.5, 6.5)) +# ax.set_title(str(hydroid)) +# sns.despine(f, left=True, bottom=True) +# sns.scatterplot(x='comp_ManningN', y='Stage', data=plot_df, label="Orig SRC", ax=ax, color='blue') +# #sns.scatterplot(x='Discharge (m3s-1)_varMann', y='Stage', data=plot_df, label="SRC w/ vMann", ax=ax, color='orange') +# sns.lineplot(x='comp_ManningN', y='Stage_1_5', data=plot_df, color='green', ax=ax) +# plt.fill_between(plot_df['comp_ManningN'], plot_df['Stage_1_5'],alpha=0.5) +# plt.text(plot_df['comp_ManningN'].median(), plot_df['Stage_1_5'].median(), "NWM 1.5yr: " + str(plot_df['Stage_1_5'].median())) +# ax.legend() +# plt.savefig(plt_out_dir + os.sep + str(hydroid) + '.png',dpi=175, bbox_inches='tight') +# plt.close() + +def multi_process(variable_mannings_calc, procs_list): + ## Initiate multiprocessing + print(f"Applying variable Manning's n to SRC calcs for {len(procs_list)} hucs using {number_of_jobs} jobs") + with Pool(processes=number_of_jobs) as pool: + map_output = pool.map(variable_mannings_calc, procs_list) + log_file.writelines(["%s\n" % item for item in map_output]) + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description="Vary the Manning's n values for in-channel vs. floodplain (recalculate Manning's eq for Discharge)") + parser.add_argument('-fim_dir','--fim-dir', help='FIM output dir', required=True,type=str) + parser.add_argument('-bc','--channel-ratio-src-column',help='SRC attribute containing the channel vs. overbank geometry ratio (for composite calc)',required=False,type=str,default='chann_hradius_ratio') + parser.add_argument('-mann','--mann-n-table',help="Path to a csv file containing Manning's n values by featureid",required=True,type=str) + parser.add_argument('-suff','--output-suffix',help="Suffix to append to the output log file (e.g. '_global_06_011')",required=True,type=str) + parser.add_argument('-j','--number-of-jobs',help='number of workers',required=False,default=1,type=int) + parser.add_argument('-plots','--src-plot-option',help='Optional (True or False): use this flag to create src plots for all hydroids. WARNING - long runtime',required=False,default='False',type=str) + parser.add_argument('-viz_clean','--viz-clean',help='Optional (Viz flag): pass the viz flag (0 or 1) to delete intermediate csv files',required=False,default=0,type=int) + + args = vars(parser.parse_args()) + + fim_dir = args['fim_dir'] + channel_ratio_src_column = args['channel_ratio_src_column'] + mann_n_table = args['mann_n_table'] + output_suffix = args['output_suffix'] + number_of_jobs = args['number_of_jobs'] + src_plot_option = args['src_plot_option'] + viz_clean_flag = args['viz_clean'] + procs_list = [] + + print('Writing progress to log file here: ' + str(join(fim_dir,'log_composite_n' + output_suffix + '.log'))) + print('This may take a few minutes...') + ## Create a time var to log run time + begin_time = dt.datetime.now() + + ## Check that the bankfull flow filepath exists and read to dataframe + if not isfile(mann_n_table): + print('!!! Can not find the input roughness/feature_id file: ' + str(mann_n_table)) + else: + ## Read the Manning's n csv (ensure that it contains feature_id, channel mannings, floodplain mannings) + print('Importing the Manning roughness data file: ' + mann_n_table) + df_mann = pd.read_csv(mann_n_table,dtype={'feature_id': 'int64'}) + if 'channel_n' not in df_mann.columns or 'overbank_n' not in df_mann.columns or 'feature_id' not in df_mann.columns: + print('Missing required data column ("feature_id","channel_n", and/or "overbank_n")!!! --> ' + df_mann) + else: + print('Running the variable_mannings_calc function...') + + ## Loop through hucs in the fim_dir and create list of variables to feed to multiprocessing + huc_list = os.listdir(fim_dir) + skip_hucs_log = "" + for huc in huc_list: + if huc != 'logs' and huc[-3:] != 'log' and huc[-4:] != '.csv': + in_src_bankfull_filename = join(fim_dir,huc,'src_full_crosswalked_bankfull.csv') + out_src_vmann_filename = join(fim_dir,huc,'src_full_crosswalked_vmann.csv') + htable_filename = join(fim_dir,huc,'hydroTable.csv') + huc_plot_output_dir = join(fim_dir,huc,'src_plots') + + if isfile(in_src_bankfull_filename): + print(str(huc)) + procs_list.append([in_src_bankfull_filename, channel_ratio_src_column, df_mann, huc, out_src_vmann_filename, htable_filename, src_plot_option, huc_plot_output_dir,viz_clean_flag]) + else: + print(str(huc) + '\nWARNING --> can not find the src_full_crosswalked_bankfull.csv in the fim output dir: ' + str(join(fim_dir,huc)) + ' - skipping this HUC!!!\n') + + ## initiate log file + print(f"Applying variable Manning's n to SRC calcs for {len(procs_list)} hucs using {number_of_jobs} jobs") + sys.__stdout__ = sys.stdout + log_file = open(join(fim_dir,'logs','log_composite_n' + output_suffix + '.log'),"w") + sys.stdout = log_file + log_file.write('START TIME: ' + str(begin_time) + '\n') + log_file.write('#########################################################\n\n') + + ## Pass huc procs_list to multiprocessing function + multi_process(variable_mannings_calc, procs_list) + + ## Record run time and close log file + end_time = dt.datetime.now() + log_file.write('END TIME: ' + str(end_time) + '\n') + tot_run_time = end_time - begin_time + log_file.write('TOTAL RUN TIME: ' + str(tot_run_time)) + sys.stdout = sys.__stdout__ + log_file.close() diff --git a/tests/aggregate_metrics.py b/tests/aggregate_metrics.py deleted file mode 100644 index 98134da59..000000000 --- a/tests/aggregate_metrics.py +++ /dev/null @@ -1,108 +0,0 @@ -#!/usr/bin/env python3 - -import json -import os -import csv - -import argparse - -TEST_CASES_DIR = r'/data/test_cases/' -# TEMP = r'/data/temp' - -# Search through all previous_versions in test_cases -from utils.shared_functions import compute_stats_from_contingency_table - -def aggregate_metrics(config="DEV", branch="", hucs="", special_string="", outfolder=""): - - # Read hucs into list. - if hucs != "": - huc_list = [line.rstrip('\n') for line in open(hucs)] - - else: - huc_list = None - - if config == "DEV": - config_version = "development_versions" - elif config == "PREV": - config_version = "previous_versions" - - # Make directory to store output aggregates. - if special_string != "": - special_string = "_" + special_string - aggregate_output_dir = os.path.join(outfolder, 'aggregate_metrics', branch + '_aggregate_metrics' + special_string) - if not os.path.exists(aggregate_output_dir): - os.makedirs(aggregate_output_dir) - - test_cases_dir_list = os.listdir(TEST_CASES_DIR) - - for magnitude in ['100yr', '500yr', 'action', 'minor', 'moderate', 'major']: - huc_path_list = [['huc', 'path']] - true_positives, true_negatives, false_positives, false_negatives, cell_area, masked_count = 0, 0, 0, 0, 0, 0 - - for test_case in test_cases_dir_list: - - if test_case not in ['other', 'validation_data_ble', 'validation_data_legacy', 'validation_data_ahps']: - branch_results_dir = os.path.join(TEST_CASES_DIR, test_case, 'performance_archive', config_version, branch) - - huc = test_case.split('_')[0] - # Check that the huc is in the list of hucs to aggregate. - if huc_list != None and huc not in huc_list: - continue - - stats_json_path = os.path.join(branch_results_dir, magnitude, 'total_area_stats.json') - - # If there is a stats json for the test case and branch name, use it when aggregating stats. - if os.path.exists(stats_json_path): - json_dict = json.load(open(stats_json_path)) - - true_positives += json_dict['true_positives_count'] - true_negatives += json_dict['true_negatives_count'] - false_positives += json_dict['false_positives_count'] - false_negatives += json_dict['false_negatives_count'] - masked_count += json_dict['masked_count'] - - cell_area = json_dict['cell_area_m2'] - - huc_path_list.append([huc, stats_json_path]) - - - if cell_area == 0: - continue - - # Pass all sums to shared function to calculate metrics. - stats_dict = compute_stats_from_contingency_table(true_negatives, false_negatives, false_positives, true_positives, cell_area=cell_area, masked_count=masked_count) - - list_to_write = [['metric', 'value']] # Initialize header. - - for stat in stats_dict: - list_to_write.append([stat, stats_dict[stat]]) - - # Map path to output directory for aggregate metrics. - output_file = os.path.join(aggregate_output_dir, branch + '_aggregate_metrics_' + magnitude + special_string + '.csv') - - if cell_area != 0: - with open(output_file, 'w', newline='') as csvfile: - csv_writer = csv.writer(csvfile) - csv_writer.writerows(list_to_write) - csv_writer.writerow([]) - csv_writer.writerows(huc_path_list) - - print() - print("Finished aggregating for the '" + magnitude + "' magnitude. Aggregated metrics over " + str(len(huc_path_list)-1) + " test cases.") - print() - print("Results are at: " + output_file) - print() - - -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description='Aggregates a metric or metrics for multiple HUC8s.') - parser.add_argument('-c','--config',help='Save outputs to development_versions or previous_versions? Options: "DEV" or "PREV"',required=False) - parser.add_argument('-b','--branch',help='Name of branch to check all test_cases for and to aggregate.',required=True) - parser.add_argument('-u','--hucs',help='HUC8s to restrict the aggregation.',required=False, default="") - parser.add_argument('-s','--special_string',help='Special string to add to outputs.',required=False, default="") - parser.add_argument('-f','--outfolder',help='output folder',required=True,type=str) - - args = vars(parser.parse_args()) - - aggregate_metrics(**args) diff --git a/tests/all_ble_stats_comparison.py b/tests/all_ble_stats_comparison.py deleted file mode 100755 index c4683c4e1..000000000 --- a/tests/all_ble_stats_comparison.py +++ /dev/null @@ -1,54 +0,0 @@ -#!/usr/bin/env python3 - -import os -import pandas as pd -import argparse - - -def subset_vector_layers(huclist, branch, current_dev, outfolder): - - test_cases= r'data/test_cases' - ble_sitelist = [str(line.rstrip('\n')) for line in open(huclist)] - stat_list = ['fim_1_0_0', 'fim_2_3_3',str(current_dev), 'new_feature','eval'] - eval_all = pd.DataFrame([]) - - # stat_list = stat_list + [branch] - for site in ble_sitelist: - eval_100_path=os.path.join(test_cases,str(site) + '_ble', 'performance_archive', 'development_versions', branch, '100yr','stats_summary.csv') - eval_500_path=os.path.join(test_cases,str(site) + '_ble', 'performance_archive', 'development_versions', branch, '500yr','stats_summary.csv') - - if os.path.exists(eval_100_path) and os.path.exists(eval_500_path): - eval_100 = pd.read_csv(eval_100_path,index_col=0) - eval_100['eval'] = '100yr' - - eval_500 = pd.read_csv(eval_500_path,index_col=0) - eval_500['eval'] = '500yr' - - eval_combined = eval_100.append(eval_500) - eval_combined.columns = ['new_feature' if x==str(branch) else x for x in eval_combined.columns] - eval_combined = eval_combined.filter(items=stat_list) - eval_combined = eval_combined.reindex(columns=stat_list) - eval_combined['site'] = str(site) - eval_combined['branch'] = str(branch) - eval_all = eval_all.append(eval_combined) - - if not os.path.exists(outfolder): - os.makedirs(outfolder) - eval_all.to_csv(os.path.join(outfolder,'ble_stats_comparison.csv')) - -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description='Collect eval stats for BLE sites') - parser.add_argument('-b','--huclist', help='list of ble sites to test', required=True,type=str) - parser.add_argument('-e','--branch', help='list of outfolder(s)', required=False,type=str) - parser.add_argument('-d','--current-dev',help='name of current dev stat column',required=True,type=str) - parser.add_argument('-f','--outfolder',help='output folder',required=True,type=str) - - args = vars(parser.parse_args()) - - huclist = args['huclist'] - branch = args['branch'] - current_dev = args['current_dev'] - outfolder = args['outfolder'] - - subset_vector_layers(huclist,branch,current_dev,outfolder) diff --git a/tests/ble_autoeval.sh b/tests/ble_autoeval.sh deleted file mode 100755 index ea8af2d94..000000000 --- a/tests/ble_autoeval.sh +++ /dev/null @@ -1,89 +0,0 @@ -#!/bin/bash -e -: -usage () -{ - echo 'Automate runs of run_test_case.py and the aggregation of metrics for list of BLE sites.' - echo 'Usage : ble_auto_eval.sh [REQ: -f -b -d -s -v ] [OPT: -h -j ]' - echo '' - echo 'REQUIRED:' - echo ' -f/--fim-outfolder : fim output directory(s)' - echo ' -b/--ble-list : list of ble sites to evaluate' - echo ' -d/--current-dev : current archived dev stats column name' - echo ' -s/--outfolder : outfolder name' - echo ' -v/--version : version eval results. options are Options: "DEV" or "PREV"' - echo '' - echo 'OPTIONS:' - echo ' -h/--help : help file' - echo ' -j/--jobLimit : max number of concurrent jobs to run. Default 1 job at time. 1 outputs' - echo ' stdout and stderr to terminal and logs. With >1 outputs progress and logs the rest' - exit -} - -if [ "$#" -lt 7 ] -then - usage -fi - -while [ "$1" != "" ]; do -case $1 -in - -f|--fim_outfolder) - shift - fim_outfolder="$1" - ;; - -b|--ble_list) - shift - ble_list="$1" - ;; - -d|--current_dev) - shift - current_dev="$1" - ;; - -s|--outfolder) - shift - outfolder="$1" - ;; - -v|--version) - shift - version="$1" - ;; - -j|--jobLimit) - shift - jobLimit=$1 - ;; - -h|--help) - shift - usage - ;; - esac - shift -done - -export testDir='foss_fim/tests' - - -for branch in $fim_outfolder -do - echo "processing feature branch: $branch" - - while read p; do - # Run Eval - if [ -d "/data/outputs/$branch/$p" ] - then - echo "processing ble for $branch/$p" - python3 /$testDir/run_test_case.py -r $branch/$p -t $p"_ble" -b $branch -c - fi - - if [ -d "/data/outputs/$branch/$(echo $p| cut -b 1-6)" ] - then - echo "processing ble for $branch/$(echo $p| cut -b 1-6)" - python3 /$testDir/run_test_case.py -r $branch/$(echo $p| cut -b 1-6) -t $p"_ble" -b $branch -c - fi - done <$ble_list -done - -echo "combining ble metrics" -python3 /$testDir/all_ble_stats_comparison.py -b $ble_list -e "$fim_outfolder" -d $current_dev -f $outfolder - -echo "calculating aggregate metrics" -python3 /$testDir/aggregate_metrics.py -c $version -b "$fim_outfolder" -u $ble_list -f $outfolder diff --git a/tests/cache_metrics.py b/tests/cache_metrics.py deleted file mode 100644 index 63571977f..000000000 --- a/tests/cache_metrics.py +++ /dev/null @@ -1,108 +0,0 @@ -#!/usr/bin/env python3 - -import os -import argparse - -from run_test_case import run_alpha_test -from multiprocessing import Pool - -TEST_CASES_DIR = r'/data/test_cases/' -PREVIOUS_FIM_DIR = r'/data/previous_fim' -OUTPUTS_DIR = r'/data/outputs' - - -def process_alpha_test(args): - - fim_run_dir = args[0] - branch_name = args[1] - test_id = args[2] - magnitude = args[3] - archive_results = args[4] - - mask_type = 'huc' - - if archive_results == False: - compare_to_previous = True - else: - compare_to_previous = False - - try: - run_alpha_test(fim_run_dir, branch_name, test_id, magnitude, compare_to_previous=compare_to_previous, archive_results=archive_results, mask_type=mask_type) - except Exception as e: - print(e) - - -if __name__ == '__main__': - - # Parse arguments. - parser = argparse.ArgumentParser(description='Caches metrics from previous versions of HAND.') - parser.add_argument('-c','--config',help='Save outputs to development_versions or previous_versions? Options: "DEV" or "PREV"',required=True) - parser.add_argument('-v','--fim-version',help='Name of fim version to cache.',required=False, default="all") - parser.add_argument('-j','--job-number',help='Number of processes to use. Default is 1.',required=False, default="1") - parser.add_argument('-s','--special-string',help='Add a special name to the end of the branch.',required=False, default="") - parser.add_argument('-b','--benchmark-category',help='Options include ble or ahps. Defaults to process both.',required=False, default=['ble', 'ahps']) - - test_cases_dir_list = os.listdir(TEST_CASES_DIR) - - args = vars(parser.parse_args()) - - config = args['config'] - fim_version = args['fim_version'] - job_number = int(args['job_number']) - special_string = args['special_string'] - benchmark_category = args['benchmark_category'] - - if fim_version != "all": - previous_fim_list = [fim_version] - else: - previous_fim_list = os.listdir(PREVIOUS_FIM_DIR) - - if config == 'PREV': - archive_results = True - elif config == 'DEV': - archive_results = False - else: - print('Config (-c) option incorrectly set. Use "DEV" or "PREV"') - - if type(benchmark_category) != list: - benchmark_category = [benchmark_category] - - procs_list = [] - for test_id in test_cases_dir_list: - if 'validation' and 'other' not in test_id: - - current_huc = test_id.split('_')[0] - - if test_id.split('_')[1] in benchmark_category: - - for branch_name in previous_fim_list: - - if config == 'DEV': - fim_run_dir = os.path.join(OUTPUTS_DIR, branch_name, current_huc) - elif config == 'PREV': - fim_run_dir = os.path.join(PREVIOUS_FIM_DIR, branch_name, current_huc) - - if os.path.exists(fim_run_dir): - - if special_string != "": - branch_name = branch_name + '_' + special_string - - if 'ble' in test_id: - magnitude = ['100yr', '500yr'] - elif 'ahps' in test_id: - magnitude = ['action', 'minor', 'moderate', 'major'] - else: - continue - - print("Adding " + test_id + " to list of test_ids to process...") - if job_number > 1: - procs_list.append([fim_run_dir, branch_name, test_id, magnitude, archive_results]) - else: - process_alpha_test([fim_run_dir, branch_name, test_id, magnitude, archive_results]) - - else: - print("No test_ids were found for the provided benchmark category: " + str(benchmark_category)) - - if job_number > 1: - pool = Pool(job_number) - pool.map(process_alpha_test, procs_list) \ No newline at end of file diff --git a/tests/comparing_src.py b/tests/comparing_src.py deleted file mode 100755 index a9c8a1c8a..000000000 --- a/tests/comparing_src.py +++ /dev/null @@ -1,391 +0,0 @@ -import matplotlib.pyplot as plt -import numpy as np -import json -import geopandas as gpd -import pandas as pd -from raster import Raster -import os -from shapely.geometry import Point - -projectDirectory = os.path.join(os.path.expanduser('~'),'projects','foss_fim') -dataDirectory = os.path.join(projectDirectory,'data') - -# nwm_catchments_fileName = os.path.join(dataDirectory,'nwm','NWMCatchment.shp') -# nwm_flows_fileName = os.path.join(dataDirectory,'test2','inputs','nwm_flows_proj_120903_v2.gpkg') -# -# esri_catchments_fileName = os.path.join(projectDirectory,'tests','CatchmentH.shp') -esri_flows_fileName = os.path.join(projectDirectory,'tests','eval_1','final_esri_hand_outputs','FPRiver.gpkg') - -# foss_catchments_fileName = os.path.join(dataDirectory,'test2','outputs','gw_catchments_reaches_clipped_addedAttributes_crosswalked.gpkg') -# foss_raster_catchments_fileName = os.path.join(dataDirectory,'test2','outputs','gw_catchments_reaches_clipped_addedAttributes.tif') -foss_flows_fileName = os.path.join(dataDirectory,'test2','outputs','demDerived_reaches_split_clipped_addedAttributes_crosswalked.gpkg') -foss_flows_fileName = os.path.join(dataDirectory,'test2','outputs','NHDPlusBurnLineEvent_subset_split_clipped_addedAttributes_crosswalked.gpkg') -# foss_flows_fileName = os.path.join(dataDirectory,'test2','outputs_v32','demDerived_reaches_split_clipped_addedAttributes_crosswalked.gpkg') - -esri_src_fileName = os.path.join(projectDirectory,'tests','eval_1','final_esri_hand_outputs','120903_channel_properties.json') -foss_src_fileName = os.path.join(dataDirectory,'test2','outputs','src.json') -# foss_src_fileName = os.path.join(dataDirectory,'test2','outputs_v32','src.json') - -esri_src_table_fileName = os.path.join(projectDirectory,'tests','eval_1','final_esri_hand_outputs','pf_ModelStream.csv') -foss_src_table_fileName = os.path.join(dataDirectory,'test2','outputs','src_full_crosswalked.csv') -# foss_src_table_fileName = os.path.join(dataDirectory,'test2','outputs_v32','src_full_crosswalked.csv') - -esri_cw_fileName = os.path.join(projectDirectory,'tests','eval_1','final_esri_hand_outputs','cross_walk_table_esri_120903.csv') -foss_cw_fileName = os.path.join(dataDirectory,'test2','outputs','crosswalk_table.csv') -# foss_cw_fileName = os.path.join(dataDirectory,'test2','outputs_v32','crosswalk_table.csv') - -esri_rem_fileName = os.path.join(projectDirectory,'tests','eval_1','final_esri_hand_outputs','hand_120903.tif') -foss_rem_fileName = os.path.join(dataDirectory,'test2','outputs','rem_clipped_zeroed_masked.tif') - -forecast_100_fileName = os.path.join(projectDirectory,'tests','eval_1','validation_data','forecast_120903_100yr.csv') - -# catchments -# esri_catchments = gpd.read_file(esri_catchments_fileName) -# foss_catchments = gpd.read_file(foss_catchments_fileName) -# # foss_raster_catchments = Raster(foss_raster_catchments_fileName) -# nwm_catchments = gpd.read_file(nwm_catchments_fileName) -# -# # flows -esri_flows = gpd.read_file(esri_flows_fileName) -foss_flows = gpd.read_file(foss_flows_fileName) -# nwm_flows = gpd.read_file(nwm_flows_fileName) - -# slopes -# esri_slope=Raster('eval_1/final_esri_hand_outputs/unitrun/hdr.adf') -# foss_slope=Raster('../data/test2/outputs/slopes_d8_thalwegCond_filled_clipped_masked.tif') -# -# foss_cell_area = abs(foss_rem.gt[1]*foss_rem.gt[5]) -# esri_cell_area = abs(esri_rem.gt[1] * esri_rem.gt[5]) -# -# foss_dv_bool = foss_rem.array!=foss_rem.ndv -# esri_dv_bool = esri_rem.array!=esri_rem.ndv -# -# esri_slope.array = esri_slope.array[esri_dv_bool] - 1 -# -# foss_slopes_trans = np.sqrt(1+(foss_slope.array[foss_dv_bool])**2) -# esri_slopes_trans = np.sqrt(1+(esri_slope.array[esri_dv_bool])**2) -# for d in np.array(range(0,30))*.3048: -# foss_area = np.sum(np.logical_and(foss_dv_bool,foss_rem.array<=d) * foss_cell_area * foss_slopes_trans) -# esri_area = np.sum(np.logical_and(esri_dv_bool,esri_rem.array<=d) * esri_cell_area * esri_slopes_trans) - - - - - -# sinuosity - -def sinuosity(flows_geometry): - - numberOfGeoms = len(flows_geometry) - arc_lengths = [-1] * numberOfGeoms ; straight_lengths = [-1] * numberOfGeoms - - for i,geom in enumerate(flows_geometry): - arc_lengths[i] = geom.length - - point_1 = Point(*geom.bounds[0:2]) - point_2 = Point(*geom.bounds[2:4]) - - straight_lengths[i] = point_1.distance(point_2) - - sinuosity_table = pd.DataFrame({'arc_lengths' : arc_lengths , 'straight_lengths' : straight_lengths}) - - return(sinuosity_table) - -esri_sinuosity = sinuosity(esri_flows.geometry) -foss_sinuosity = sinuosity(foss_flows.geometry) - -avg_esri_si = (esri_sinuosity['arc_lengths']/esri_sinuosity['straight_lengths']).mean() -avg_foss_si = (foss_sinuosity['arc_lengths']/foss_sinuosity['straight_lengths']).mean() - -print(avg_esri_si,avg_foss_si,avg_esri_si/avg_foss_si) - - -# SRCS's -with open(esri_src_fileName,'r') as f: - esri_src = json.load(f) - -with open(foss_src_fileName,'r') as f: - foss_src = json.load(f) - -esri_cw = pd.read_csv(esri_cw_fileName,dtype=int) -foss_cw = pd.read_csv(foss_cw_fileName,dtype=int) - -esri_rem = Raster(esri_rem_fileName) -foss_rem = Raster(foss_rem_fileName) - -esri_src_table = pd.read_csv(esri_src_table_fileName,dtype={'A':float, 'B':float, 'H':float, 'Length_m':float, 'P':float, 'R':float, 'HydroID':int, 'Q':float}) -foss_src_table = pd.read_csv(foss_src_table_fileName,dtype={'HydroID':int, 'Stage':float, 'Number of Cells':int, 'SurfaceArea (m2)':float, - 'BedArea (m2)':float, 'Volume (m3)':float, 'SLOPE':float, 'LENGTHKM':float, 'AREASQKM':float, - 'Roughness':float, 'TopWidth (m)':float, 'WettedPerimeter (m)':float, 'WetArea (m2)':float, - 'HydraulicRadius (m)':float, 'Discharge (m3s-1)':float, 'feature_id':int}) - -forecast_100 = pd.read_csv(forecast_100_fileName,dtype={'feature_id' : int , 'discharge' : float}) - -intersection_of_feature_id = list(set(esri_cw['feature_id'].unique()) & set(foss_cw['feature_id'].unique()) & set(forecast_100['feature_id'].unique()) ) - - -max_q = np.max(forecast_100['discharge']) -# print(max_q) - -esri_src_table['BA'] = esri_src_table['P'] * esri_src_table['Length_m'] -esri_src_table['V'] = esri_src_table['A'] * esri_src_table['Length_m'] - -esri_src_table = esri_src_table[:][esri_src_table['H']<=10] -foss_src_table = foss_src_table[:][foss_src_table['Stage']<=10] -# print(esri_src_table.sort_values(by=['HydroID','H'])) - -esri_cw = esri_cw[:][esri_cw['feature_id'].isin(intersection_of_feature_id)] -foss_cw = foss_cw[:][foss_cw['feature_id'].isin(intersection_of_feature_id)] - -esri_src_table = esri_src_table.merge(esri_cw,on='HydroID',how='inner') -foss_src_table = foss_src_table.merge(foss_cw,on='HydroID',how='inner') - -foss_src_table.drop(columns='feature_id_y',inplace=True) -foss_src_table.rename(columns={'feature_id_x':'feature_id'},inplace=True) -# esri_hids = esri_cw['HydroID'][esri_cw['feature_id'].isin(intersection_of_feature_id)] -# foss_hids = foss_cw['HydroID'][foss_cw['feature_id'].isin(intersection_of_feature_id)] - -# esri_src_table = esri_src_table[:][esri_src_table['HydroID'].isin(esri_hids)] -# foss_src_table = foss_src_table[:][foss_src_table['HydroID'].isin(foss_hids)] - -# esri_src_table = esri_src_table[:][esri_src_table['HydroID'].isin(esri_hids)] -# foss_src_table = foss_src_table[:][foss_src_table['HydroID'].isin(foss_hids)] - -foss_src_table['Length_m'] = foss_src_table['LENGTHKM'] *1000 -esri_src_table = esri_src_table.merge(esri_flows[['HydroID','S0']],on='HydroID',how='left') - -foss_src_table.rename(columns={'Stage' : 'H' , 'BedArea (m2)' : 'BA','Volume (m3)' : 'V' , - 'SLOPE' : 'S0' , 'WettedPerimeter (m)': 'P', 'WetArea (m2)' : 'A', - 'HydraulicRadius (m)':'R', 'Discharge (m3s-1)': 'Q'},inplace=True) - -foss_src_table = foss_src_table[['H' , 'BA','V' ,'S0' ,'P','A','R','Q','feature_id','HydroID','Length_m']] - -foss_src_table['n'] = 0.06 -esri_src_table['n'] = 0.06 - -# esri_src_table.sort_values(by=['HydroID','H'],inplace=True) -# foss_src_table.sort_values(by=['HydroID','H'],inplace=True) - -esri_src_table.drop(columns='HydroID',inplace=True) -foss_src_table.drop(columns='HydroID',inplace=True) - -esri_src_table = esri_src_table.astype({'H' : str}) -foss_src_table = foss_src_table.astype({'H' : str}) -# esri_src_table = esri_src_table.groupby(['feature_id','H']).mean() -# foss_src_table = foss_src_table.groupby(['feature_id','H']).mean() -# esri_src_table = esri_src_table.astype({'H' :float}) -# foss_src_table = foss_src_table.astype({'H' :float}) - -# esri_src_table.reset_index(drop=True) -# foss_src_table.reset_index(drop=True) - -src_table = foss_src_table.merge(esri_src_table,suffixes=('_foss','_esri'),on=['feature_id','H']) -# esri_src_table.sort_values(by=['HydroID','H'],inplace=True) - -# src_table.sort_values(by=['feature_id','H'],inplace=True) -# src_table.reset_index(drop=False,inplace=True) - -src_table = src_table.groupby('H').mean() -src_table.reset_index(drop=False,inplace=True) -src_table = src_table.astype({'H' :float}) -src_table.sort_values(by=['H'],inplace=True) -# print(src_table.index) - -pd.set_option('display.max_rows', 2000) -# print(src_table[['feature_id','H','V_esri','V_foss']].iloc[0:200,:]) -# print(src_table) -percent_error_V = 100 * (src_table['V_foss'].iloc[1:]-src_table['V_esri'].iloc[1:])/src_table['V_esri'].iloc[1:] -percent_error_BA = 100 * (src_table['BA_foss'].iloc[1:]-src_table['BA_esri'].iloc[1:])/src_table['BA_esri'].iloc[1:] -percent_error_L = 100 * (src_table['Length_m_foss']-src_table['Length_m_esri'])/src_table['Length_m_esri'] -percent_error_S = 100 * (src_table['S0_foss']-src_table['S0_esri'])/src_table['S0_esri'] -percent_error_Q = 100 * (src_table['Q_foss'].iloc[1:]-src_table['Q_esri'].iloc[1:])/src_table['Q_esri'].iloc[1:] - -multiplied_error_V = (src_table['V_foss'].iloc[1:]/src_table['V_esri'].iloc[1:])**(5/3) -multiplied_error_BA = (src_table['BA_foss'].iloc[1:]/src_table['BA_esri'].iloc[1:])**(2/3) -multiplied_error_L = (src_table['Length_m_foss']/src_table['Length_m_esri']) -multiplied_error_S = (src_table['S0_foss']/src_table['S0_esri'])**(1/2) -multiplied_error_Q = (src_table['Q_foss'].iloc[1:]/src_table['Q_esri'].iloc[1:]) - -print(percent_error_V.mean(),percent_error_BA.mean(),percent_error_L.mean(),percent_error_S.mean(),percent_error_Q.mean()) -print(multiplied_error_V.mean(),multiplied_error_BA.mean(),multiplied_error_L.mean(),multiplied_error_S.mean(),multiplied_error_Q.mean()) -print((multiplied_error_V.mean()*multiplied_error_S.mean())/(multiplied_error_BA.mean()*multiplied_error_L.mean())) -# print(percent_error_V,percent_error_BA,percent_error_L,percent_error_S,percent_error_Q) -# exit() -# -# tot_V_esri = [] ; tot_V_foss = [] -# foss_dv_bool = foss_rem.array!=foss_rem.ndv -# esri_dv_bool = esri_rem.array!=esri_rem.ndv -# for d in np.array(range(0,30))*.3048: -# foss_cell_area = abs(foss_rem.gt[1]*foss_rem.gt[5]) -# esri_cell_area = abs(esri_rem.gt[1] * esri_rem.gt[5]) -# foss_volume = np.sum(d-foss_rem.array[np.logical_and(foss_dv_bool,foss_rem.array<=d)]) * foss_cell_area -# esri_volume = np.sum(d-esri_rem.array[np.logical_and(esri_dv_bool,esri_rem.array<=d)]) * esri_cell_area -# tot_V_esri = tot_V_esri + [esri_volume] ; tot_V_foss = tot_V_foss + [foss_volume] -# -# print(np.array(tot_V_foss).mean()/np.array(tot_V_esri).mean()) -# print((foss_dv_bool.sum() * foss_cell_area) / (esri_dv_bool.sum() * esri_cell_area)) - - - - - -# print(esri_src_table[['feature_id','H','V']].iloc[0:20,:]) -# print(foss_src_table) -# print(esri_src_table) - - - - -# foss_src_table['HydroID'] - -stage_list = foss_src[str(500)]['stage_list'] -maxLength = len(stage_list) - -overall_esri = None -for fid in intersection_of_feature_id: - esri_hid = esri_cw['HydroID'][esri_cw['feature_id'] == fid].to_numpy() - foss_hid = foss_cw['HydroID'][foss_cw['feature_id'] == fid].to_numpy() - - # all_esri_q = np.zeros(len(esri_src[str(esri_hid[0])]['stage_list']),dtype=np.float32) - all_esri_q = None - for hid in esri_hid: - current_esri_q = np.array(esri_src[str(hid)]['q_list']) - - if len(current_esri_q) < maxLength: - nan_array = np.repeat(np.nan, maxLength - len(current_esri_q)) - # print(nan_array) - current_esri_q = np.hstack((current_esri_q,nan_array)) - - if len(current_esri_q) > maxLength: - current_esri_q = current_esri_q[0:maxLength] - - if all_esri_q is None: - all_esri_q = current_esri_q - else: - all_esri_q = np.vstack((all_esri_q,current_esri_q)) - - all_foss_q = None - for hid in foss_hid: - - current_foss_q = np.array(foss_src[str(hid)]['q_list']) - - if all_foss_q is None: - all_foss_q = current_foss_q - else: - all_foss_q = np.vstack((all_foss_q,current_foss_q)) - - # print(all_esri_q.shape,all_foss_q.shape) - # print(all_esri_q) - - if len(all_esri_q.shape) == 2: - mean_esri_q = np.nanmean(all_esri_q,axis=0) - - if len(all_foss_q.shape) == 2: - mean_foss_q = np.nanmean(all_foss_q,axis=0) - - # mean_error = mean_foss_q-mean_esri_q - - # print(mean_esri_q.shape,mean_foss_q.shape,mean_error.shape) - - # mean_abs_error = np.absolute(mean_error) - - if overall_esri is None: - # overall_error = mean_error - overall_esri = mean_esri_q - overall_foss = mean_foss_q - # overall_abs_error = mean_abs_error - else: - # print(mean_error,overall_error.shape) - # overall_error = np.vstack((overall_error,mean_error)) - overall_esri = np.vstack((overall_esri,mean_esri_q)) - overall_foss = np.vstack((overall_foss,mean_foss_q)) - # overall_abs_error = np.vstack((overall_abs_error,mean_abs_error)) - -# print(overall_error) -# print(list(overall_error)) -# overall_error_q_list = list(np.nanmean(overall_error,axis=0)) -overall_esri_q_list = list(np.nanmean(overall_esri,axis=0)) -overall_foss_q_list = list(np.nanmean(overall_foss,axis=0)) - -plt.plot(overall_esri_q_list,stage_list,'r') -plt.plot(overall_foss_q_list,stage_list,'b') -# plt.axis([0,max_q*1.1,0,10]) -plt.show() - -exit() - - - - - - - - -# print(np.mean(overall_abs_error,axis=0)) - -# foss_src = pd.read_csv(foss_src_fileName,skip_blank_lines=True,dtype=object) - -# print('\nFeature IDs') -# print("ESRI # of unique catchments: {}".format(len(np.unique(esri_catchments['feature_id'])))) -# print("FOSS # of unique catchments: {}".format(len(np.unique(foss_catchments['feature_id'])))) -# print("NWM # of unique catchments: {}".format(len(np.unique(nwm_catchments['feature_id'])))) -# print("ESRI # of unique flows: {}".format(len(np.unique(esri_flows['feature_id'])))) -# print("FOSS # of unique flows: {}".format(len(np.unique(foss_flows['feature_id'])))) -# print("NWM # of unique flows: {}".format(len(np.unique(nwm_flows['ID'])))) -# print("FOSS # of unique SRC Feature ID: {}".format(len(np.unique(foss_src['feature_id'])))) -# -# print('\nHydroID') -# print("ESRI # of unique catchments: {}".format(len(np.unique(esri_catchments['HydroID'])))) -# print("FOSS # of unique catchments: {}".format(len(np.unique(foss_catchments['HydroID'])))) -# # print("FOSS # of unique catchments in raster: {}".format(len(np.unique(foss_raster_catchments.array[foss_raster_catchments.array!=foss_raster_catchments.ndv])))) -# print("ESRI # of unique flows: {}".format(len(np.unique(esri_flows['HydroID'])))) -# print("FOSS # of unique flows: {}".format(len(np.unique(foss_flows['HydroID'])))) -# print("ESRI # of unique SRC HydroID: {}".format(len(np.unique(list(esri_src.keys()))))) -# print("FOSS # of unique HydroID's: {}".format(len(np.unique(foss_src['HydroID'])))) -# -# print(foss_flows['LengthKm'].max()) -# print(foss_flows['LengthKm'].mean()) - -# print(list(esri_src.keys())) - -# print(len(foss_src)) -# plots src's -# unique_feature_ids_in_foss_src = np.unique(foss_src['feature_id']) - -# featID = 5791828 - -# indices_of_feature = np.where(foss_src['feature_id'] == featID) - -# unique_hydro_ids = np.unique(foss_src['HydroID'][indices_of_feature]) - -# hydroID = '822' -# esri_hydroID = '9975' - -# hydroID = '1279' -# esri_hydroID = '10349' - -hydroID = '1268' -esri_hydroID = '10743' - -hydroID = '1269' -esri_hydroID = '10742' - -# indices_of_hydroid = np.where(foss_src['HydroID'] == hydroID)[0] - -foss_stages = foss_src[hydroID]['stage_list'] -foss_discharge = foss_src[hydroID]['q_list'] - -# feature_id = foss_src['feature_id'][indices_of_hydroid[0]] -esri_stages = esri_src[esri_hydroID]['stage_list'] -esri_flows = esri_src[esri_hydroID]['q_list'] - - -plt.plot(foss_discharge,foss_stages,'b') -plt.plot(esri_flows,esri_stages,'r') -plt.show() - -# for hid in unique_hydro_ids: - - - -# for featID in unique_feature_ids_in_foss_src: diff --git a/tests/mannings_calibration_run.sh b/tests/mannings_calibration_run.sh deleted file mode 100755 index 8dc737ee4..000000000 --- a/tests/mannings_calibration_run.sh +++ /dev/null @@ -1,123 +0,0 @@ -#!/bin/bash -e -: -usage () -{ - echo "Calibrate FIM based on Manning's n values" - echo 'Usage : fim_run.sh [REQ: -d -t -g -n ] [OPT: -h]' - echo '' - echo 'REQUIRED:' - echo ' -d/--fimdir : initial run directory with default mannings values' - echo ' -t/--huclist : huc or list of hucs' - echo ' -g/--outdir : output directory for mannings parameter adjustment files' - echo ' -n/--paramfile : parameter set file' - echo '' - echo 'OPTIONS:' - echo ' -h/--help : help file' - echo ' -o/--overwrite : overwrite outputs if already exist' - echo ' -j/--jobLimit : max number of concurrent jobs to run. Default 1 job at time. 1 outputs' - exit -} - -if [ "$#" -lt 7 ] -then - usage -fi - -while [ "$1" != "" ]; do -case $1 -in - -d|--fimdir) - shift - fimdir="$1" - ;; - -t|--huclist ) - shift - huclist=$1 - ;; - -g|--outdir ) - shift - outdir=$1 - ;; - -n|--paramfile) - shift - paramfile=$1 - ;; - -h|--help) - shift - usage - ;; - -o|--overwrite) - overwrite=1 - ;; - -j|--jobLimit) - shift - jobLimit=$1 - ;; - *) ;; - esac - shift -done - -# print usage if arguments empty -if [ "$fimdir" = "" ] -then - usage -fi - -# default values -if [ "$jobLimit" = "" ] ; then - jobLimit=1 -fi - -export input_NWM_Catchments=$inputDataDir/nwm_hydrofabric/nwm_catchments.gpkg -export outdir=$outdir -export testdir="/foss_fim/tests" - -if [ -f "$huclist" ]; then - - while read huc; do - - export huc=$huc - export fimdir=$fimdir - export hucdir="/data/outputs/"$fimdir/$huc - - ## RUN ## - if [ -f "$paramfile" ]; then - if [ "$jobLimit" -eq 1 ]; then - parallel --verbose --lb -j $jobLimit -- $testdir/time_and_tee_mannings_calibration.sh :::: $paramfile - else - parallel --eta -j $jobLimit -- $testdir/time_and_tee_mannings_calibration.sh :::: $paramfile - fi - else - if [ "$jobLimit" -eq 1 ]; then - parallel --verbose --lb -j $jobLimit -- $testdir/time_and_tee_mannings_calibration.sh ::: $paramfile - else - parallel --eta -j $jobLimit -- $testdir/time_and_tee_mannings_calibration.sh ::: $paramfile - fi - fi - done <$huclist - -else - - for huc in $huclist - do - export huc=$huc - export fimdir=$fimdir - export hucdir="/data/outputs/"$fimdir/$huc - - ## RUN ## - if [ -f "$paramfile" ]; then - if [ "$jobLimit" -eq 1 ]; then - parallel --verbose --lb -j $jobLimit -- $testdir/time_and_tee_mannings_calibration.sh :::: $paramfile - else - parallel --eta -j $jobLimit -- $testdir/time_and_tee_mannings_calibration.sh :::: $paramfile - fi - else - if [ "$jobLimit" -eq 1 ]; then - parallel --verbose --lb -j $jobLimit -- $testdir/time_and_tee_mannings_calibration.sh ::: $paramfile - else - parallel --eta -j $jobLimit -- $testdir/time_and_tee_mannings_calibration.sh ::: $paramfile - fi - fi - done -fi diff --git a/tests/mannings_run_by_set.sh b/tests/mannings_run_by_set.sh deleted file mode 100755 index 9ee875e2a..000000000 --- a/tests/mannings_run_by_set.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash -e - -param_set="$1" -IFS=',' read -r -a array <<< $param_set - -strorder="${array[1]}" -mannings_row=1+"$strorder" -mannings_value="${array[$mannings_row]}" - -subdir=$outdir/$huc"_"$strorder"_"$mannings_value -mkdir -p $subdir - -$libDir/add_crosswalk.py -d $hucdir/gw_catchments_reaches_filtered_addedAttributes.gpkg -a $hucdir/demDerived_reaches_split_filtered.gpkg -s $hucdir/src_base.csv -l $subdir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -f $subdir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -r $subdir/src_full_crosswalked.csv -j $subdir/src.json -x $subdir/crosswalk_table.csv -t $subdir/hydroTable.csv -w $hucdir/wbd8_clp.gpkg -b $hucdir/nwm_subset_streams.gpkg -y $hucdir/nwm_catchments_proj_subset.tif -m $param_set -z $input_NWM_Catchments -p FR -c - -python3 foss_fim/tests/run_test_case_calibration.py -r $fimdir/$huc -d $subdir -t $huc"_ble" -b "mannings_calibration"/$strorder/$mannings_value diff --git a/tests/run_test_case.py b/tests/run_test_case.py deleted file mode 100755 index 16f50b882..000000000 --- a/tests/run_test_case.py +++ /dev/null @@ -1,465 +0,0 @@ -#!/usr/bin/env python3 - -import os -import sys -import pandas as pd -import geopandas as gpd -import rasterio -import json -import csv -import argparse -import shutil - -from utils.shared_functions import get_contingency_table_from_binary_rasters, compute_stats_from_contingency_table -from inundation import inundate - -TEST_CASES_DIR = r'/data/test_cases/' # Will update. -INPUTS_DIR = r'/data/inputs' -PRINTWORTHY_STATS = ['CSI', 'TPR', 'TNR', 'FAR', 'MCC', 'TP_area_km2', 'FP_area_km2', 'TN_area_km2', 'FN_area_km2', 'contingency_tot_area_km2', 'TP_perc', 'FP_perc', 'TN_perc', 'FN_perc'] -GO_UP_STATS = ['CSI', 'TPR', 'MCC', 'TN_area_km2', 'TP_area_km2', 'TN_perc', 'TP_perc', 'TNR'] -GO_DOWN_STATS = ['FAR', 'FN_area_km2', 'FP_area_km2', 'FP_perc', 'FN_perc'] -OUTPUTS_DIR = os.environ['outputDataDir'] - -ENDC = '\033[m' -TGREEN_BOLD = '\033[32;1m' -TGREEN = '\033[32m' -TRED_BOLD = '\033[31;1m' -TWHITE = '\033[37m' -WHITE_BOLD = '\033[37;1m' -CYAN_BOLD = '\033[36;1m' - - -def profile_test_case_archive(archive_to_check, magnitude, stats_mode): - """ - This function searches multiple directories and locates previously produced performance statistics. - - Args: - archive_to_check (str): The directory path to search. - magnitude (str): Because a benchmark dataset may have multiple magnitudes, this argument defines - which magnitude is to be used when searching for previous statistics. - Returns: - archive_dictionary (dict): A dictionary of available statistics for previous versions of the domain and magnitude. - {version: {agreement_raster: agreement_raster_path, stats_csv: stats_csv_path, stats_json: stats_json_path}} - *Will only add the paths to files that exist. - - """ - - archive_dictionary = {} - - # List through previous version and check for available stats and maps. If available, add to dictionary. - available_versions_list = os.listdir(archive_to_check) - - if len(available_versions_list) == 0: - print("Cannot compare with -c flag because there are no data in the previous_versions directory.") - return - - for version in available_versions_list: - version_magnitude_dir = os.path.join(archive_to_check, version, magnitude) - stats_json = os.path.join(version_magnitude_dir, stats_mode + '_stats.json') - - if os.path.exists(stats_json): - archive_dictionary.update({version: {'stats_json': stats_json}}) - - return archive_dictionary - - -def compute_contingency_stats_from_rasters(predicted_raster_path, benchmark_raster_path, agreement_raster=None, stats_csv=None, stats_json=None, mask_values=None, stats_modes_list=['total_area'], test_id='', mask_dict={}): - """ - This function contains FIM-specific logic to prepare raster datasets for use in the generic get_contingency_table_from_binary_rasters() function. - This function also calls the generic compute_stats_from_contingency_table() function and writes the results to CSV and/or JSON, depending on user input. - - Args: - predicted_raster_path (str): The path to the predicted, or modeled, FIM extent raster. - benchmark_raster_path (str): The path to the benchmark, or truth, FIM extent raster. - agreement_raster (str): Optional. An agreement raster will be written to this path. 0: True Negatives, 1: False Negative, 2: False Positive, 3: True Positive. - stats_csv (str): Optional. Performance statistics will be written to this path. CSV allows for readability and other tabular processes. - stats_json (str): Optional. Performance statistics will be written to this path. JSON allows for quick ingestion into Python dictionary in other processes. - - Returns: - stats_dictionary (dict): A dictionary of statistics produced by compute_stats_from_contingency_table(). Statistic names are keys and statistic values are the values. - """ - - # Get cell size of benchmark raster. - raster = rasterio.open(predicted_raster_path) - t = raster.transform - cell_x = t[0] - cell_y = t[4] - cell_area = abs(cell_x*cell_y) - - # Get contingency table from two rasters. - contingency_table_dictionary = get_contingency_table_from_binary_rasters(benchmark_raster_path, predicted_raster_path, agreement_raster, mask_values=mask_values, mask_dict=mask_dict) - - stats_dictionary = {} - - for stats_mode in contingency_table_dictionary: - true_negatives = contingency_table_dictionary[stats_mode]['true_negatives'] - false_negatives = contingency_table_dictionary[stats_mode]['false_negatives'] - false_positives = contingency_table_dictionary[stats_mode]['false_positives'] - true_positives = contingency_table_dictionary[stats_mode]['true_positives'] - masked_count = contingency_table_dictionary[stats_mode]['masked_count'] - file_handle = contingency_table_dictionary[stats_mode]['file_handle'] - - # Produce statistics from continency table and assign to dictionary. cell_area argument optional (defaults to None). - mode_stats_dictionary = compute_stats_from_contingency_table(true_negatives, false_negatives, false_positives, true_positives, cell_area, masked_count) - - # Write the mode_stats_dictionary to the stats_csv. - if stats_csv != None: - stats_csv = os.path.join(os.path.split(stats_csv)[0], file_handle + '_stats.csv') - df = pd.DataFrame.from_dict(mode_stats_dictionary, orient="index", columns=['value']) - df.to_csv(stats_csv) - - # Write the mode_stats_dictionary to the stats_json. - if stats_json != None: - stats_json = os.path.join(os.path.split(stats_csv)[0], file_handle + '_stats.json') - with open(stats_json, "w") as outfile: - json.dump(mode_stats_dictionary, outfile) - - stats_dictionary.update({stats_mode: mode_stats_dictionary}) - - return stats_dictionary - - -def check_for_regression(stats_json_to_test, previous_version, previous_version_stats_json_path, regression_test_csv=None): - - difference_dict = {} - - # Compare stats_csv to previous_version_stats_file - stats_dict_to_test = json.load(open(stats_json_to_test)) - previous_version_stats_dict = json.load(open(previous_version_stats_json_path)) - - for stat, value in stats_dict_to_test.items(): - previous_version_value = previous_version_stats_dict[stat] - stat_value_diff = value - previous_version_value - difference_dict.update({stat + '_diff': stat_value_diff}) - - return difference_dict - - -def run_alpha_test(fim_run_dir, branch_name, test_id, magnitude, compare_to_previous=False, archive_results=False, mask_type='huc', inclusion_area='', inclusion_area_buffer=0, light_run=False): - - # Construct paths to development test results if not existent. - if archive_results: - branch_test_case_dir_parent = os.path.join(TEST_CASES_DIR, test_id, 'performance_archive', 'previous_versions', branch_name) - else: - branch_test_case_dir_parent = os.path.join(TEST_CASES_DIR, test_id, 'performance_archive', 'development_versions', branch_name) - - # Delete the entire directory if it already exists. - if os.path.exists(branch_test_case_dir_parent): - shutil.rmtree(branch_test_case_dir_parent) - - print("Running the alpha test for test_id: " + test_id + ", " + branch_name + "...") - stats_modes_list = ['total_area'] - - fim_run_parent = os.path.join(os.environ['outputDataDir'], fim_run_dir) - assert os.path.exists(fim_run_parent), "Cannot locate " + fim_run_parent - - # Create paths to fim_run outputs for use in inundate(). - rem = os.path.join(fim_run_parent, 'rem_zeroed_masked.tif') - catchments = os.path.join(fim_run_parent, 'gw_catchments_reaches_filtered_addedAttributes.tif') - if mask_type == 'huc': - catchment_poly = '' - else: - catchment_poly = os.path.join(fim_run_parent, 'gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg') - hydro_table = os.path.join(fim_run_parent, 'hydroTable.csv') - - # Map necessary inputs for inundation(). - hucs, hucs_layerName = os.path.join(INPUTS_DIR, 'wbd', 'WBD_National.gpkg'), 'WBDHU8' - - benchmark_category = test_id.split('_')[1] - current_huc = test_id.split('_')[0] # Break off HUC ID and assign to variable. - - # Create list of shapefile paths to use as exclusion areas. - zones_dir = os.path.join(TEST_CASES_DIR, 'other', 'zones') - mask_dict = {'levees': - {'path': os.path.join(zones_dir, 'leveed_areas_conus.shp'), - 'buffer': None, - 'operation': 'exclude' - }, - 'waterbodies': - {'path': os.path.join(zones_dir, 'nwm_v2_reservoirs.shp'), - 'buffer': None, - 'operation': 'exclude', - }, - } - - if not os.path.exists(branch_test_case_dir_parent): - os.mkdir(branch_test_case_dir_parent) - - # If the test_id is AHPS, then identify possible inclusion zones in the HUC. - if benchmark_category == 'ahps': - - ahps_inclusion_zones_dir = os.path.join(branch_test_case_dir_parent, 'ahps_domains') - print(ahps_inclusion_zones_dir) - if not os.path.exists(ahps_inclusion_zones_dir): - os.mkdir(ahps_inclusion_zones_dir) - - ahps_domain_shapefile = os.path.join(TEST_CASES_DIR, 'other', 'zones', 'ahps_domains.shp') - - # Open shapefile, determine the polys in the huc, create a different shapefile for each poly--name according to AHPS. - ahps_domain_obj = gpd.read_file(ahps_domain_shapefile) - ahps_domain_gdf = gpd.GeoDataFrame(ahps_domain_obj) - - # Loop through entries and compare against the huc4_list to get available HUCs within the geopackage domain. - for index, row in ahps_domain_gdf.iterrows(): - huc8_code = row['huc8_code'] - ahps = row['ahps_code'] - - if huc8_code == current_huc: - ahps_domain_subset = ahps_domain_obj[ahps_domain_obj.ahps_code == ahps] - - #.query("ahps_code=='{ahps_code}'".format(ahps_code=ahps_code)) - ahps_domain_subset_output = os.path.join(ahps_inclusion_zones_dir, ahps + '.shp') - ahps_domain_subset.to_file(ahps_domain_subset_output,driver='ESRI Shapefile') - - mask_dict.update({ahps: - {'path': ahps_domain_subset_output, - 'buffer': None, - 'operation': 'include'} - }) - - if inclusion_area != '': - inclusion_area_name = os.path.split(inclusion_area)[1].split('.')[0] # Get layer name - mask_dict.update({inclusion_area_name: {'path': inclusion_area, - 'buffer': int(inclusion_area_buffer), - 'operation': 'include'}}) - # Append the concatenated inclusion_area_name and buffer. - if inclusion_area_buffer == None: - inclusion_area_buffer = 0 - stats_modes_list.append(inclusion_area_name + '_b' + str(inclusion_area_buffer) + 'm') - - # Check if magnitude is list of magnitudes or single value. - magnitude_list = magnitude - if type(magnitude_list) != list: - magnitude_list = [magnitude_list] - - for magnitude in magnitude_list: - # Construct path to validation raster and forecast file. - - benchmark_raster_path = os.path.join(TEST_CASES_DIR, 'validation_data_' + benchmark_category, current_huc, magnitude, benchmark_category + '_huc_' + current_huc + '_depth_' + magnitude + '.tif') - if not os.path.exists(benchmark_raster_path): # Skip loop instance if the benchmark raster doesn't exist. - continue - - branch_test_case_dir = os.path.join(branch_test_case_dir_parent, magnitude) - - os.makedirs(branch_test_case_dir) # Make output directory for branch. - - # Define paths to inundation_raster and forecast file. - inundation_raster = os.path.join(branch_test_case_dir, 'inundation_extent.tif') - forecast = os.path.join(TEST_CASES_DIR, 'validation_data_' + benchmark_category, current_huc, magnitude, benchmark_category + '_huc_' + current_huc + '_flows_' + magnitude + '.csv') - - # Run inundate. - print("-----> Running inundate() to produce modeled inundation extent for the " + magnitude + " magnitude...") - inundate( - rem,catchments,catchment_poly,hydro_table,forecast,mask_type,hucs=hucs,hucs_layerName=hucs_layerName, - subset_hucs=current_huc,num_workers=1,aggregate=False,inundation_raster=inundation_raster,inundation_polygon=None, - depths=None,out_raster_profile=None,out_vector_profile=None,quiet=True - ) - - print("-----> Inundation mapping complete.") - predicted_raster_path = os.path.join(os.path.split(inundation_raster)[0], os.path.split(inundation_raster)[1].replace('.tif', '_' + current_huc + '.tif')) # The inundate adds the huc to the name so I account for that here. - - # Define outputs for agreement_raster, stats_json, and stats_csv. - - agreement_raster, stats_json, stats_csv = os.path.join(branch_test_case_dir, 'total_area_agreement.tif'), os.path.join(branch_test_case_dir, 'stats.json'), os.path.join(branch_test_case_dir, 'stats.csv') - - test_version_dictionary = compute_contingency_stats_from_rasters(predicted_raster_path, - benchmark_raster_path, - agreement_raster, - stats_csv=stats_csv, - stats_json=stats_json, - mask_values=[], - stats_modes_list=stats_modes_list, - test_id=test_id, - mask_dict=mask_dict, - ) - print(" ") - print("Evaluation complete. All metrics for " + test_id + ", " + branch_name + ", " + magnitude + " are available at " + CYAN_BOLD + branch_test_case_dir + ENDC) - print(" ") - - if compare_to_previous: - text_block = [] - # Compare to previous stats files that are available. - archive_to_check = os.path.join(TEST_CASES_DIR, test_id, 'performance_archive', 'previous_versions') - for stats_mode in stats_modes_list: - archive_dictionary = profile_test_case_archive(archive_to_check, magnitude, stats_mode) - - if archive_dictionary == {}: - break - - # Create header for section. - header = [stats_mode] - for previous_version, paths in archive_dictionary.items(): - header.append(previous_version) - header.append(branch_name) - text_block.append(header) - - # Loop through stats in PRINTWORTHY_STATS for left. - for stat in PRINTWORTHY_STATS: - stat_line = [stat] - for previous_version, paths in archive_dictionary.items(): - # Load stats for previous version. - previous_version_stats_json_path = paths['stats_json'] - if os.path.exists(previous_version_stats_json_path): - previous_version_stats_dict = json.load(open(previous_version_stats_json_path)) - - # Append stat for the version to state_line. - stat_line.append(previous_version_stats_dict[stat]) - - - # Append stat for the current version to stat_line. - stat_line.append(test_version_dictionary[stats_mode][stat]) - - text_block.append(stat_line) - - text_block.append([" "]) - - regression_report_csv = os.path.join(branch_test_case_dir, 'stats_summary.csv') - with open(regression_report_csv, 'w', newline='') as csvfile: - csv_writer = csv.writer(csvfile) - csv_writer.writerows(text_block) - - print() - print("--------------------------------------------------------------------------------------------------") - - stats_mode = stats_modes_list[0] - try: - last_version_index = text_block[0].index('dev_latest') - except ValueError: - try: - last_version_index = text_block[0].index('fim_2_3_3') - except ValueError: - try: - last_version_index = text_block[0].index('fim_1_0_0') - except ValueError: - print(TRED_BOLD + "Warning: " + ENDC + "Cannot compare " + branch_name + " to a previous version because no authoritative versions were found in previous_versions directory. Future version of run_test_case may allow for comparisons between dev branches.") - print() - continue - - - - for line in text_block: - first_item = line[0] - if first_item in stats_modes_list: - current_version_index = line.index(branch_name) - if first_item != stats_mode: # Update the stats_mode and print a separator. - print() - print() - print("--------------------------------------------------------------------------------------------------") - print() - stats_mode = first_item - print(CYAN_BOLD + current_huc + ": " + magnitude.upper(), ENDC) - print(CYAN_BOLD + stats_mode.upper().replace('_', ' ') + " METRICS" + ENDC) - print() - - color = WHITE_BOLD - metric_name = ' '.center(len(max(PRINTWORTHY_STATS, key=len))) - percent_change_header = '% CHG' - difference_header = 'DIFF' - current_version_header = line[current_version_index].upper() - last_version_header = line[last_version_index].upper() - # Print Header. - print(color + metric_name + " " + percent_change_header.center((7)) + " " + difference_header.center((15)) + " " + current_version_header.center(18) + " " + last_version_header.center(18), ENDC) - # Format and print stat row. - elif first_item in PRINTWORTHY_STATS: - stat_name = first_item.upper().center(len(max(PRINTWORTHY_STATS, key=len))).replace('_', ' ') - current_version = round((line[current_version_index]), 3) - last_version = round((line[last_version_index]) + 0.000, 3) - difference = round(current_version - last_version, 3) - if difference > 0: - symbol = '+' - if first_item in GO_UP_STATS: - color = TGREEN_BOLD - elif first_item in GO_DOWN_STATS: - color = TRED_BOLD - else: - color = TWHITE - if difference < 0: - symbol = '-' - if first_item in GO_UP_STATS: - color = TRED_BOLD - elif first_item in GO_DOWN_STATS: - color = TGREEN_BOLD - else: - color = TWHITE - - if difference == 0 : - symbol, color = '+', TGREEN - percent_change = round((difference / last_version)*100,2) - - print(WHITE_BOLD + stat_name + ENDC + " " + color + (symbol + " {:5.2f}".format(abs(percent_change)) + " %").rjust(len(percent_change_header)), ENDC + " " + color + ("{:12.3f}".format((difference))).rjust(len(difference_header)), ENDC + " " + "{:15.3f}".format(current_version).rjust(len(current_version_header)) + " " + "{:15.3f}".format(last_version).rjust(len(last_version_header)) + " ") - - print() - print() - print() - print("--------------------------------------------------------------------------------------------------") - print() - - -if __name__ == '__main__': - - # Parse arguments. - parser = argparse.ArgumentParser(description='Inundation mapping and regression analysis for FOSS FIM. Regression analysis results are stored in the test directory.') - parser.add_argument('-r','--fim-run-dir',help='Name of directory containing outputs of fim_run.sh',required=True) - parser.add_argument('-b', '--branch-name',help='The name of the working branch in which features are being tested',required=True,default="") - parser.add_argument('-t', '--test-id',help='The test_id to use. Format as: HUC_BENCHMARKTYPE, e.g. 12345678_ble.',required=True,default="") - parser.add_argument('-m', '--mask-type', help='Specify \'huc\' (FIM < 3) or \'filter\' (FIM >= 3) masking method', required=False,default="huc") - parser.add_argument('-y', '--magnitude',help='The magnitude to run.',required=False, default="") - parser.add_argument('-c', '--compare-to-previous', help='Compare to previous versions of HAND.', required=False,action='store_true') - parser.add_argument('-a', '--archive-results', help='Automatically copy results to the "previous_version" archive for test_id. For admin use only.', required=False,action='store_true') - parser.add_argument('-i', '--inclusion-area', help='Path to shapefile. Contingency metrics will be produced from pixels inside of shapefile extent.', required=False, default="") - parser.add_argument('-ib','--inclusion-area-buffer', help='Buffer to use when masking contingency metrics with inclusion area.', required=False, default="0") - parser.add_argument('-l', '--light-run', help='Using the light_run option will result in only stat files being written, and NOT grid files.', required=False, action='store_true') - - # Extract to dictionary and assign to variables. - args = vars(parser.parse_args()) - - valid_test_id_list = os.listdir(TEST_CASES_DIR) - - exit_flag = False # Default to False. - print() - - # Ensure test_id is valid. - if args['test_id'] not in valid_test_id_list: - print(TRED_BOLD + "Warning: " + WHITE_BOLD + "The provided test_id (-t) " + CYAN_BOLD + args['test_id'] + WHITE_BOLD + " is not available." + ENDC) - print(WHITE_BOLD + "Available test_ids include: " + ENDC) - for test_id in valid_test_id_list: - if 'validation' not in test_id.split('_') and 'ble' in test_id.split('_'): - print(CYAN_BOLD + test_id + ENDC) - print() - exit_flag = True - - # Ensure fim_run_dir exists. - if not os.path.exists(os.path.join(os.environ['outputDataDir'], args['fim_run_dir'])): - print(TRED_BOLD + "Warning: " + WHITE_BOLD + "The provided fim_run_dir (-r) " + CYAN_BOLD + args['fim_run_dir'] + WHITE_BOLD + " could not be located in the 'outputs' directory." + ENDC) - print(WHITE_BOLD + "Please provide the parent directory name for fim_run.sh outputs. These outputs are usually written in a subdirectory, e.g. outputs/123456/123456." + ENDC) - print() - exit_flag = True - - # Ensure inclusion_area path exists. - if args['inclusion_area'] != "" and not os.path.exists(args['inclusion_area']): - print(TRED_BOLD + "Error: " + WHITE_BOLD + "The provided inclusion_area (-i) " + CYAN_BOLD + args['inclusion_area'] + WHITE_BOLD + " could not be located." + ENDC) - exit_flag = True - - try: - inclusion_buffer = int(args['inclusion_area_buffer']) - except ValueError: - print(TRED_BOLD + "Error: " + WHITE_BOLD + "The provided inclusion_area_buffer (-ib) " + CYAN_BOLD + args['inclusion_area_buffer'] + WHITE_BOLD + " is not a round number." + ENDC) - - if args['magnitude'] == '': - if 'ble' in args['test_id'].split('_'): - args['magnitude'] = ['100yr', '500yr'] - elif 'ahps' in args['test_id'].split('_'): - args['magnitude'] = ['action', 'minor', 'moderate', 'major'] - else: - print(TRED_BOLD + "Error: " + WHITE_BOLD + "The provided magnitude (-y) " + CYAN_BOLD + args['magnitude'] + WHITE_BOLD + " is invalid. ble options include: 100yr, 500yr. ahps options include action, minor, moderate, major." + ENDC) - exit_flag = True - - - if exit_flag: - print() - sys.exit() - - - else: - - run_alpha_test(**args) diff --git a/tests/run_test_case_calibration.py b/tests/run_test_case_calibration.py deleted file mode 100755 index cefe80594..000000000 --- a/tests/run_test_case_calibration.py +++ /dev/null @@ -1,456 +0,0 @@ -#!/usr/bin/env python3 - -import os -import sys -import pandas as pd -import rasterio -import json -import csv -import argparse -import shutil - -from utils.shared_functions import get_contingency_table_from_binary_rasters, compute_stats_from_contingency_table -from inundation import inundate - -TEST_CASES_DIR = r'/data/test_cases/' # Will update. -INPUTS_DIR = r'/data/inputs' -PRINTWORTHY_STATS = ['CSI', 'TPR', 'TNR', 'FAR', 'MCC', 'TP_area_km2', 'FP_area_km2', 'TN_area_km2', 'FN_area_km2', 'contingency_tot_area_km2', 'TP_perc', 'FP_perc', 'TN_perc', 'FN_perc'] -GO_UP_STATS = ['CSI', 'TPR', 'MCC', 'TN_area_km2', 'TP_area_km2', 'TN_perc', 'TP_perc', 'TNR'] -GO_DOWN_STATS = ['FAR', 'FN_area_km2', 'FP_area_km2', 'FP_perc', 'FN_perc'] -OUTPUTS_DIR = os.environ['outputDataDir'] - -ENDC = '\033[m' -TGREEN_BOLD = '\033[32;1m' -TGREEN = '\033[32m' -TRED_BOLD = '\033[31;1m' -TWHITE = '\033[37m' -WHITE_BOLD = '\033[37;1m' -CYAN_BOLD = '\033[36;1m' - - -def profile_test_case_archive(archive_to_check, return_interval, stats_mode): - """ - This function searches multiple directories and locates previously produced performance statistics. - - Args: - archive_to_check (str): The directory path to search. - return_interval (str): Because a benchmark dataset may have multiple return intervals, this argument defines - which return interval is to be used when searching for previous statistics. - Returns: - archive_dictionary (dict): A dictionary of available statistics for previous versions of the domain and return interval. - {version: {agreement_raster: agreement_raster_path, stats_csv: stats_csv_path, stats_json: stats_json_path}} - *Will only add the paths to files that exist. - - """ - - archive_dictionary = {} - - # List through previous version and check for available stats and maps. If available, add to dictionary. - available_versions_list = os.listdir(archive_to_check) - - if len(available_versions_list) == 0: - print("Cannot compare with -c flag because there are no data in the previous_versions directory.") - return - - for version in available_versions_list: - version_return_interval_dir = os.path.join(archive_to_check, version, return_interval) - # Initialize dictionary for version and set paths to None by default. - archive_dictionary.update({version: {'agreement_raster': None, - 'stats_csv': None, - 'stats_json': None}}) - # Find stats files and raster files and add to dictionary. - agreement_raster = os.path.join(version_return_interval_dir, stats_mode + '_agreement.tif') - stats_csv = os.path.join(version_return_interval_dir, stats_mode + '_stats.csv') - stats_json = os.path.join(version_return_interval_dir, stats_mode + '_stats.json') - - if os.path.exists(agreement_raster): - archive_dictionary[version]['agreement_raster'] = agreement_raster - if os.path.exists(stats_csv): - archive_dictionary[version]['stats_csv'] = stats_csv - if os.path.exists(stats_json): - archive_dictionary[version]['stats_json'] = stats_json - - return archive_dictionary - - -def compute_contingency_stats_from_rasters(predicted_raster_path, benchmark_raster_path, agreement_raster=None, stats_csv=None, stats_json=None, mask_values=None, stats_modes_list=['total_area'], test_id='', exclusion_mask_dict={}): - """ - This function contains FIM-specific logic to prepare raster datasets for use in the generic get_contingency_table_from_binary_rasters() function. - This function also calls the generic compute_stats_from_contingency_table() function and writes the results to CSV and/or JSON, depending on user input. - - Args: - predicted_raster_path (str): The path to the predicted, or modeled, FIM extent raster. - benchmark_raster_path (str): The path to the benchmark, or truth, FIM extent raster. - agreement_raster (str): Optional. An agreement raster will be written to this path. 0: True Negatives, 1: False Negative, 2: False Positive, 3: True Positive. - stats_csv (str): Optional. Performance statistics will be written to this path. CSV allows for readability and other tabular processes. - stats_json (str): Optional. Performance statistics will be written to this path. JSON allows for quick ingestion into Python dictionary in other processes. - - Returns: - stats_dictionary (dict): A dictionary of statistics produced by compute_stats_from_contingency_table(). Statistic names are keys and statistic values are the values. - """ - - # Get cell size of benchmark raster. - raster = rasterio.open(predicted_raster_path) - t = raster.transform - cell_x = t[0] - cell_y = t[4] - cell_area = abs(cell_x*cell_y) - - additional_layers_dict = {} - # Create path to additional_layer. Could put conditionals here to create path according to some version. Simply use stats_mode for now. Must be raster. - if len(stats_modes_list) > 1: - additional_layers_dict = {} - for stats_mode in stats_modes_list: - if stats_mode != 'total_area': - additional_layer_path = os.path.join(TEST_CASES_DIR, test_id, 'additional_layers', 'inclusion_areas', stats_mode + '.tif') - if os.path.exists(additional_layer_path): - additional_layers_dict.update({stats_mode: additional_layer_path}) - else: - print("No " + stats_mode + " inclusion area found for " + test_id + ". Moving on with processing...") - - # Get contingency table from two rasters. - contingency_table_dictionary = get_contingency_table_from_binary_rasters(benchmark_raster_path, predicted_raster_path, agreement_raster=None, mask_values=mask_values, additional_layers_dict=additional_layers_dict, exclusion_mask_dict=exclusion_mask_dict) - - stats_dictionary = {} - - for stats_mode in contingency_table_dictionary: - true_negatives = contingency_table_dictionary[stats_mode]['true_negatives'] - false_negatives = contingency_table_dictionary[stats_mode]['false_negatives'] - false_positives = contingency_table_dictionary[stats_mode]['false_positives'] - true_positives = contingency_table_dictionary[stats_mode]['true_positives'] - masked_count = contingency_table_dictionary[stats_mode]['masked_count'] - - # Produce statistics from continency table and assign to dictionary. cell_area argument optional (defaults to None). - mode_stats_dictionary = compute_stats_from_contingency_table(true_negatives, false_negatives, false_positives, true_positives, cell_area, masked_count) - - # Write the mode_stats_dictionary to the stats_csv. - if stats_csv != None: - stats_csv = os.path.join(os.path.split(stats_csv)[0], stats_mode + '_stats.csv') - df = pd.DataFrame.from_dict(mode_stats_dictionary, orient="index", columns=['value']) - df.to_csv(stats_csv) - - # Write the mode_stats_dictionary to the stats_json. - if stats_json != None: - stats_json = os.path.join(os.path.split(stats_csv)[0], stats_mode + '_stats.json') - with open(stats_json, "w") as outfile: - json.dump(mode_stats_dictionary, outfile) - - stats_dictionary.update({stats_mode: mode_stats_dictionary}) - - return stats_dictionary - - -def check_for_regression(stats_json_to_test, previous_version, previous_version_stats_json_path, regression_test_csv=None): - - difference_dict = {} - - # Compare stats_csv to previous_version_stats_file - stats_dict_to_test = json.load(open(stats_json_to_test)) - previous_version_stats_dict = json.load(open(previous_version_stats_json_path)) - - for stat, value in stats_dict_to_test.items(): - previous_version_value = previous_version_stats_dict[stat] - stat_value_diff = value - previous_version_value - difference_dict.update({stat + '_diff': stat_value_diff}) - - return difference_dict - -def run_alpha_test(fim_run_dir, calib_dir, branch_name, test_id, mask_type, return_interval, compare_to_previous=False, run_structure_stats=False, run_levee_stats=False, archive_results=False): - - # Construct paths to development test results if not existent. - if archive_results: - branch_test_case_dir_parent = os.path.join(TEST_CASES_DIR, test_id, 'performance_archive', 'previous_versions', branch_name) - else: - branch_test_case_dir_parent = os.path.join(TEST_CASES_DIR, test_id, 'performance_archive', 'development_versions', branch_name) - - # Delete the entire directory if it already exists. - if os.path.exists(branch_test_case_dir_parent): - shutil.rmtree(branch_test_case_dir_parent) - - print("Running the alpha test for test_id: " + test_id + ", " + branch_name + "...") - stats_modes_list = ['total_area'] - if run_structure_stats: stats_modes_list.append('structures') - if run_levee_stats: stats_modes_list.append('levees') - - fim_run_parent = os.path.join(os.environ['outputDataDir'], fim_run_dir) - fim_run_calib = str(calib_dir) - - assert os.path.exists(fim_run_parent), "Cannot locate " + fim_run_parent - assert os.path.exists(fim_run_calib), "Cannot locate " + fim_run_calib - - # Create paths to fim_run outputs for use in inundate(). - rem = os.path.join(fim_run_parent, 'rem_zeroed_masked.tif') - - catchments = os.path.join(fim_run_parent, 'gw_catchments_reaches_filtered_addedAttributes.tif') - catchment_poly = os.path.join(fim_run_parent, 'gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg') - current_huc = test_id.split('_')[0] - hucs, hucs_layerName = os.path.join(INPUTS_DIR, 'wbd', 'WBD_National.gpkg'), 'WBDHU8' - hydro_table = os.path.join(fim_run_calib, 'hydroTable.csv') - - # Create list of shapefile paths to use as exclusion areas. - zones_dir = os.path.join(TEST_CASES_DIR, 'other', 'zones') - exclusion_mask_dict = {'levees': {'path': os.path.join(zones_dir, 'leveed_areas_conus.shp'), - 'buffer': None - }, - 'waterbodies': {'path': os.path.join(zones_dir, 'nwm_v2_reservoirs.shp'), - 'buffer': None, - } - } - -# # Crosswalk feature_ids to hydroids. -# hydro_table_data = pd.read_csv(hydro_table, header=0) -# ht_feature_id_list = list(hydro_table_data.feature_id) -# ht_hydro_id_list = list(hydro_table_data.HydroID) -# lake_id_list = list(hydro_table_data.LakeID) -# -# # Get list of feature_ids_to_mask. -# feature_ids_to_mask = [] -# for f in range(0, len(lake_id_list)): -# if lake_id_list[f] != -999: -# lake_feature_id = ht_feature_id_list[f] -# if lake_feature_id not in feature_ids_to_mask: -# feature_ids_to_mask.append(lake_feature_id) - - # Remove duplicates and create list of hydro_ids to use as waterbody mask. -# reduced_ht_feature_id_list, reduced_ht_hydro_id_list, hydro_ids_to_mask = [], [], [] -# -# for i in range(0, len(ht_hydro_id_list)): -# if ht_hydro_id_list[i] not in reduced_ht_hydro_id_list: -# reduced_ht_hydro_id_list.append(ht_hydro_id_list[i]) -# reduced_ht_feature_id_list.append(ht_feature_id_list[i]) -# for i in range(0, len(reduced_ht_feature_id_list)): -# ht_feature_id = reduced_ht_feature_id_list[i] -# ht_hydro_id = reduced_ht_hydro_id_list[i] -# if ht_feature_id in feature_ids_to_mask: -# hydro_ids_to_mask.append(ht_hydro_id) - - # Check if return interval is list of return intervals or single value. - return_interval_list = return_interval - if type(return_interval_list) != list: - return_interval_list = [return_interval_list] - - for return_interval in return_interval_list: - # Construct path to validation raster and forecast file. - benchmark_category = test_id.split('_')[1] - benchmark_raster_path = os.path.join(TEST_CASES_DIR, 'validation_data_' + benchmark_category, current_huc, return_interval, benchmark_category + '_huc_' + current_huc + '_depth_' + return_interval + '.tif') - if not os.path.exists(benchmark_raster_path): # Skip loop instance if the benchmark raster doesn't exist. - continue - - branch_test_case_dir = os.path.join(branch_test_case_dir_parent, return_interval) - - os.makedirs(branch_test_case_dir) - - - # Define paths to inundation_raster and forecast file. - inundation_raster = os.path.join(branch_test_case_dir, 'inundation_extent.tif') - forecast = os.path.join(TEST_CASES_DIR, 'validation_data_' + benchmark_category, current_huc, return_interval, benchmark_category + '_huc_' + current_huc + '_flows_' + return_interval + '.csv') - - # Run inundate. - print("-----> Running inundate() to produce modeled inundation extent for the " + return_interval + " return period...") - inundate( - rem,catchments,catchment_poly,hydro_table,forecast,mask_type,hucs=hucs,hucs_layerName=hucs_layerName, - subset_hucs=current_huc,num_workers=1,aggregate=False,inundation_raster=inundation_raster,inundation_polygon=None, - depths=None,out_raster_profile=None,out_vector_profile=None,quiet=True - ) - - print("-----> Inundation mapping complete.") - predicted_raster_path = os.path.join(os.path.split(inundation_raster)[0], os.path.split(inundation_raster)[1].replace('.tif', '_' + current_huc + '.tif')) # The inundate adds the huc to the name so I account for that here. - - # Define outputs for agreement_raster, stats_json, and stats_csv. - - agreement_raster, stats_json, stats_csv = os.path.join(branch_test_case_dir, 'total_area_agreement.tif'), os.path.join(branch_test_case_dir, 'stats.json'), os.path.join(branch_test_case_dir, 'stats.csv') - - test_version_dictionary = compute_contingency_stats_from_rasters(predicted_raster_path, - benchmark_raster_path, - agreement_raster=None, - stats_csv=stats_csv, - stats_json=stats_json, - mask_values=[], - stats_modes_list=stats_modes_list, - test_id=test_id, - exclusion_mask_dict=exclusion_mask_dict - ) - print(" ") - print("Evaluation complete. All metrics for " + test_id + ", " + branch_name + ", " + return_interval + " are available at " + CYAN_BOLD + branch_test_case_dir + ENDC) - print(" ") - - if compare_to_previous: - text_block = [] - # Compare to previous stats files that are available. - archive_to_check = os.path.join(TEST_CASES_DIR, test_id, 'performance_archive', 'previous_versions') - for stats_mode in stats_modes_list: - archive_dictionary = profile_test_case_archive(archive_to_check, return_interval, stats_mode) - - if archive_dictionary == {}: - break - - # Create header for section. - header = [stats_mode] - for previous_version, paths in archive_dictionary.items(): - header.append(previous_version) - header.append(branch_name) - text_block.append(header) - - # Loop through stats in PRINTWORTHY_STATS for left. - for stat in PRINTWORTHY_STATS: - stat_line = [stat] - for previous_version, paths in archive_dictionary.items(): - # Load stats for previous version. - previous_version_stats_json_path = paths['stats_json'] - previous_version_stats_dict = json.load(open(previous_version_stats_json_path)) - - # Append stat for the version to state_line. - stat_line.append(previous_version_stats_dict[stat]) - - # Append stat for the current version to stat_line. - stat_line.append(test_version_dictionary[stats_mode][stat]) - - text_block.append(stat_line) - - text_block.append([" "]) - - regression_report_csv = os.path.join(branch_test_case_dir, 'stats_summary.csv') - with open(regression_report_csv, 'w', newline='') as csvfile: - csv_writer = csv.writer(csvfile) - csv_writer.writerows(text_block) - - print() - print("--------------------------------------------------------------------------------------------------") - - stats_mode = stats_modes_list[0] - - try: - last_version_index = text_block[0].index('dev_latest') - except ValueError: - try: - last_version_index = text_block[0].index('fim_2_3_3') - except ValueError: - try: - last_version_index = text_block[0].index('fim_1_0_0') - except ValueError: - print(TRED_BOLD + "Warning: " + ENDC + "Cannot compare " + branch_name + " to a previous version because no authoritative versions were found in previous_versions directory. Future version of run_test_case may allow for comparisons between dev branches.") - print() - continue - - current_version_index = text_block[0].index(branch_name) - - for line in text_block: - first_item = line[0] - if first_item in stats_modes_list: - if first_item != stats_mode: # Update the stats_mode and print a separator. - print() - print() - print("--------------------------------------------------------------------------------------------------") - print() - stats_mode = first_item - print(CYAN_BOLD + current_huc + ": " + return_interval.upper(), ENDC) - print(CYAN_BOLD + stats_mode.upper().replace('_', ' ') + " METRICS" + ENDC) - print() - - color = WHITE_BOLD - metric_name = ' '.center(len(max(PRINTWORTHY_STATS, key=len))) - percent_change_header = '% CHG' - difference_header = 'DIFF' - current_version_header = line[current_version_index].upper() - last_version_header = line[last_version_index].upper() - # Print Header. - print(color + metric_name + " " + percent_change_header.center((7)) + " " + difference_header.center((15)) + " " + current_version_header.center(18) + " " + last_version_header.center(18), ENDC) - # Format and print stat row. - elif first_item in PRINTWORTHY_STATS: - stat_name = first_item.upper().center(len(max(PRINTWORTHY_STATS, key=len))).replace('_', ' ') - current_version = round((line[current_version_index]), 3) - last_version = round((line[last_version_index]) + 0.000, 3) - difference = round(current_version - last_version, 3) - if difference > 0: - symbol = '+' - if first_item in GO_UP_STATS: - color = TGREEN_BOLD - elif first_item in GO_DOWN_STATS: - color = TRED_BOLD - else: - color = TWHITE - if difference < 0: - symbol = '-' - if first_item in GO_UP_STATS: - color = TRED_BOLD - elif first_item in GO_DOWN_STATS: - color = TGREEN_BOLD - else: - color = TWHITE - - if difference == 0 : - symbol, color = '+', TGREEN - percent_change = round((difference / last_version)*100,2) - - print(WHITE_BOLD + stat_name + ENDC + " " + color + (symbol + " {:5.2f}".format(abs(percent_change)) + " %").rjust(len(percent_change_header)), ENDC + " " + color + ("{:12.3f}".format((difference))).rjust(len(difference_header)), ENDC + " " + "{:15.3f}".format(current_version).rjust(len(current_version_header)) + " " + "{:15.3f}".format(last_version).rjust(len(last_version_header)) + " ") - - print() - - print() - print() - print("--------------------------------------------------------------------------------------------------") - print() - - -if __name__ == '__main__': - - # Parse arguments. - parser = argparse.ArgumentParser(description='Inundation mapping and regression analysis for FOSS FIM. Regression analysis results are stored in the test directory.') - parser.add_argument('-r','--fim-run-dir',help='Name of directory containing outputs of fim_run.sh',required=True) - parser.add_argument('-d','--calib-dir',help='Name of directory containing parameter adjustment outputs',required=True) - parser.add_argument('-b', '--branch-name',help='The name of the working branch in which features are being tested',required=True,default="") - parser.add_argument('-t','--test-id',help='The test_id to use. Format as: HUC_BENCHMARKTYPE, e.g. 12345678_ble.',required=True,default="") - parser.add_argument('-m', '--mask-type', help='Specify \'huc\' (FIM < 3) or \'filter\' (FIM >= 3) masking method', required=False,default="huc") - parser.add_argument('-y', '--return-interval',help='The return interval to check. Options include: 100yr, 500yr',required=False,default=['10yr', '100yr', '500yr']) - parser.add_argument('-c', '--compare-to-previous', help='Compare to previous versions of HAND.', required=False,action='store_true') - parser.add_argument('-s', '--run-structure-stats', help='Create contingency stats at structures.', required=False,action='store_true') - parser.add_argument('-a', '--archive-results', help='Automatically copy results to the "previous_version" archive for test_id. For admin use only.', required=False,action='store_true') - - # Extract to dictionary and assign to variables. - args = vars(parser.parse_args()) - - valid_test_id_list = os.listdir(TEST_CASES_DIR) - - exit_flag = False # Default to False. - print() - - if args['run_structure_stats']: - print("Run structure stats (-c) not yet supported.") - run_structure_stats = False - - # Ensure test_id is valid. - if args['test_id'] not in valid_test_id_list: - print(TRED_BOLD + "Warning: " + WHITE_BOLD + "The provided test_id (-t) " + CYAN_BOLD + args['test_id'] + WHITE_BOLD + " is not available." + ENDC) - print(WHITE_BOLD + "Available test_ids include: " + ENDC) - for test_id in valid_test_id_list: - if 'validation' not in test_id.split('_') and 'ble' in test_id.split('_'): - print(CYAN_BOLD + test_id + ENDC) - print() - exit_flag = True - - # Ensure fim_run_dir exists. - if not os.path.exists(os.path.join(os.environ['outputDataDir'], args['fim_run_dir'])): - print(TRED_BOLD + "Warning: " + WHITE_BOLD + "The provided fim_run_dir (-r) " + CYAN_BOLD + args['fim_run_dir'] + WHITE_BOLD + " could not be located in the 'outputs' directory." + ENDC) - print(WHITE_BOLD + "Please provide the parent directory name for fim_run.sh outputs. These outputs are usually written in a subdirectory, e.g. outputs/123456/123456." + ENDC) - print() - exit_flag = True - - # Ensure calib_dir exists. - if not os.path.exists(args['calib_dir']): - print(TRED_BOLD + "Warning: " + WHITE_BOLD + "The provided calib_dir (-r) " + CYAN_BOLD + args['calib_dir'] + WHITE_BOLD + " could not be located in the 'outputs' directory." + ENDC) - print(WHITE_BOLD + "Please provide the parent directory name for fim_run.sh outputs. These outputs are usually written in a subdirectory, e.g. outputs/123456/123456." + ENDC) - print() - exit_flag = True - - # Ensure return_interval available. - if args['return_interval'] == '10yr': - print(TRED_BOLD + "Warning: " + WHITE_BOLD + "The provided return interval (-y) " + CYAN_BOLD + args['return_interval'] + WHITE_BOLD + " is not available." + ENDC) - print() - - exit_flag = True - - if exit_flag: - print() - sys.exit() - else: - run_alpha_test(**args) diff --git a/tests/synthesize_test_cases.py b/tests/synthesize_test_cases.py deleted file mode 100644 index 913ffd6da..000000000 --- a/tests/synthesize_test_cases.py +++ /dev/null @@ -1,119 +0,0 @@ -#!/usr/bin/env python3 - - -import os -import argparse -from multiprocessing import Pool - -from run_test_case import run_alpha_test -from all_ble_stats_comparison import subset_vector_layers -from aggregate_metrics import aggregate_metrics - -TEST_CASES_DIR = r'/data/test_cases/' -PREVIOUS_FIM_DIR = r'/data/previous_fim' -OUTPUTS_DIR = r'/data/outputs' - - -def process_alpha_test(args): - - fim_run_dir = args[0] - branch_name = args[1] - test_id = args[2] - magnitude = args[3] - archive_results = args[4] - - mask_type = 'huc' - - if archive_results == False: - compare_to_previous = True - else: - compare_to_previous = False - - try: - run_alpha_test(fim_run_dir, branch_name, test_id, magnitude, compare_to_previous=compare_to_previous, archive_results=archive_results, mask_type=mask_type) - except Exception as e: - print(e) - - -if __name__ == '__main__': - - # Parse arguments. - parser = argparse.ArgumentParser(description='Caches metrics from previous versions of HAND.') - parser.add_argument('-c','--config',help='Save outputs to development_versions or previous_versions? Options: "DEV" or "PREV"',required=True) - parser.add_argument('-v','--fim-version',help='Name of fim version to cache.',required=True) - parser.add_argument('-j','--job-number',help='Number of processes to use. Default is 1.',required=False, default="1") - parser.add_argument('-s','--special-string',help='Add a special name to the end of the branch.',required=False, default="") - parser.add_argument('-b','--benchmark-category',help='Options include ble or ahps. Defaults to process both.',required=False, default=['ble', 'ahps']) - parser.add_argument('-l','--huc8-list',help='A list of HUC8s to synthesize.',required=True) - parser.add_argument('-d','--current-dev',help='The current dev id.',required=True) - parser.add_argument('-o','--output-folder',help='The directory where synthesis outputs will be written.',required=True) - - test_cases_dir_list = os.listdir(TEST_CASES_DIR) - - args = vars(parser.parse_args()) - - config = args['config'] - branch_name = args['fim_version'] - job_number = int(args['job_number']) - special_string = args['special_string'] - benchmark_category = args['benchmark_category'] - - - if config == 'PREV': - archive_results = True - elif config == 'DEV': - archive_results = False - else: - print('Config (-c) option incorrectly set. Use "DEV" or "PREV"') - - if type(benchmark_category) != list: - benchmark_category = [benchmark_category] - - procs_list = [] - for test_id in test_cases_dir_list: - if not any(x in test_id for x in ['validation','other','.lst']):#if 'validation' and 'other' not in test_id: - - current_huc = test_id.split('_')[0] - print(current_huc) - if test_id.split('_')[1] in benchmark_category: - - - if config == 'DEV': - fim_run_dir = os.path.join(OUTPUTS_DIR, branch_name, current_huc) - elif config == 'PREV': - fim_run_dir = os.path.join(PREVIOUS_FIM_DIR, branch_name, current_huc) - - if os.path.exists(fim_run_dir): - - if special_string != "": - branch_name = branch_name + '_' + special_string - - if 'ble' in test_id: - magnitude = ['100yr', '500yr'] - elif 'ahps' in test_id: - magnitude = ['action', 'minor', 'moderate', 'major'] - else: - continue - - print("Adding " + test_id + " to list of test_ids to process...") - if job_number > 1: - procs_list.append([fim_run_dir, branch_name, test_id, magnitude, archive_results]) - else: - process_alpha_test([fim_run_dir, branch_name, test_id, magnitude, archive_results]) - - else: - print("No test_ids were found for the provided benchmark category: " + str(test_id.split('_')[1])) - - # Multiprocess alpha test runs. - if job_number > 1: - pool = Pool(job_number) - pool.map(process_alpha_test, procs_list) - - # Do all_ble_stats_comparison. - subset_vector_layers(args['huc8_list'], branch_name, args['current_dev'], args['output_folder']) - - # Do aggregate_metrics. - aggregate_metrics(config=config, branch=branch_name, hucs=args['huc8_list'], special_string=args['special_string'], outfolder=args['output_folder']) - - - \ No newline at end of file diff --git a/tests/time_and_tee_mannings_calibration.sh b/tests/time_and_tee_mannings_calibration.sh deleted file mode 100755 index d45976cc8..000000000 --- a/tests/time_and_tee_mannings_calibration.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -e - -/usr/bin/time -v $testdir/mannings_run_by_set.sh $1 |& tee -exit ${PIPESTATUS[0]} diff --git a/tests/utils/shapefile_to_raster.py b/tests/utils/shapefile_to_raster.py deleted file mode 100644 index 4d1a61ed9..000000000 --- a/tests/utils/shapefile_to_raster.py +++ /dev/null @@ -1,46 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Tue Jul 14 16:19:26 2020 - -@author: bradford.bates -""" - -# A script to rasterise a shapefile to the same projection & pixel resolution as a reference image. -from osgeo import ogr, gdal -import subprocess - -InputVector = r'/data/misc/msft_building_footprints/MSFP_12090301.shp' -OutputImage = r'/data/misc/msft_building_footprints/Result.tif' - -RefImage = r'/data/test_cases/12090301_ble/validation_data/100yr/ble_huc_12090301_inundation_extent_100yr.tif' - -gdalformat = 'GTiff' -datatype = gdal.GDT_Byte -burnVal = 1 #value for the output image pixels -# Get projection info from reference image -Image = gdal.Open(RefImage, gdal.GA_ReadOnly) - -# Open Shapefile -Shapefile = ogr.Open(InputVector) -Shapefile_layer = Shapefile.GetLayer() - -# Rasterise -print("Rasterising shapefile...") -Output = gdal.GetDriverByName(gdalformat).Create(OutputImage, Image.RasterXSize, Image.RasterYSize, 1, datatype, options=['COMPRESS=DEFLATE']) -Output.SetProjection(Image.GetProjectionRef()) -Output.SetGeoTransform(Image.GetGeoTransform()) - -# Write data to band 1 -Band = Output.GetRasterBand(1) -Band.SetNoDataValue(0) -gdal.RasterizeLayer(Output, [1], Shapefile_layer, burn_values=[burnVal]) - -# Close datasets -Band = None -Output = None -Image = None -Shapefile = None - -# Build image overviews -subprocess.call("gdaladdo --config COMPRESS_OVERVIEW DEFLATE "+OutputImage+" 2 4 8 16 32 64", shell=True) -print("Done.") \ No newline at end of file diff --git a/tests/utils/shared_functions.py b/tests/utils/shared_functions.py deleted file mode 100644 index 777575f02..000000000 --- a/tests/utils/shared_functions.py +++ /dev/null @@ -1,442 +0,0 @@ -#!/usr/bin/env python3 - -def compute_stats_from_contingency_table(true_negatives, false_negatives, false_positives, true_positives, cell_area=None, masked_count=None): - """ - This generic function takes contingency table metrics as arguments and returns a dictionary of contingency table statistics. - Much of the calculations below were taken from older Python files. This is evident in the inconsistent use of case. - - Args: - true_negatives (int): The true negatives from a contingency table. - false_negatives (int): The false negatives from a contingency table. - false_positives (int): The false positives from a contingency table. - true_positives (int): The true positives from a contingency table. - cell_area (float or None): This optional argument allows for area-based statistics to be calculated, in the case that - contingency table metrics were derived from areal analysis. - - Returns: - stats_dictionary (dict): A dictionary of statistics. Statistic names are keys and statistic values are the values. - Refer to dictionary definition in bottom of function for statistic names. - - """ - - import numpy as np - - total_population = true_negatives + false_negatives + false_positives + true_positives - - # Basic stats. -# Percent_correct = ((true_positives + true_negatives) / total_population) * 100 -# pod = true_positives / (true_positives + false_negatives) - - try: - FAR = false_positives / (true_positives + false_positives) - except ZeroDivisionError: - FAR = "NA" - - try: - CSI = true_positives / (true_positives + false_positives + false_negatives) - except ZeroDivisionError: - CSI = "NA" - - try: - BIAS = (true_positives + false_positives) / (true_positives + false_negatives) - except ZeroDivisionError: - BIAS = "NA" - - # Compute equitable threat score (ETS) / Gilbert Score. - try: - a_ref = ((true_positives + false_positives)*(true_positives + false_negatives)) / total_population - EQUITABLE_THREAT_SCORE = (true_positives - a_ref) / (true_positives - a_ref + false_positives + false_negatives) - except ZeroDivisionError: - EQUITABLE_THREAT_SCORE = "NA" - - if total_population == 0: - TP_perc, FP_perc, TN_perc, FN_perc = "NA", "NA", "NA", "NA" - else: - TP_perc = (true_positives / total_population) * 100 - FP_perc = (false_positives / total_population) * 100 - TN_perc = (true_negatives / total_population) * 100 - FN_perc = (false_negatives / total_population) * 100 - - predPositive = true_positives + false_positives - predNegative = true_negatives + false_negatives - obsPositive = true_positives + false_negatives - obsNegative = true_negatives + false_positives - - TP = float(true_positives) - TN = float(true_negatives) - FN = float(false_negatives) - FP = float(false_positives) - try: - MCC = (TP*TN - FP*FN)/ np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN)) - except ZeroDivisionError: - MCC = "NA" - - if masked_count != None: - total_pop_and_mask_pop = total_population + masked_count - if total_pop_and_mask_pop == 0: - masked_perc = "NA" - else: - masked_perc = (masked_count / total_pop_and_mask_pop) * 100 - else: - masked_perc = None - - # This checks if a cell_area has been provided, thus making areal calculations possible. - sq_km_converter = 1000000 - - if cell_area != None: - TP_area = (true_positives * cell_area) / sq_km_converter - FP_area = (false_positives * cell_area) / sq_km_converter - TN_area = (true_negatives * cell_area) / sq_km_converter - FN_area = (false_negatives * cell_area) / sq_km_converter - area = (total_population * cell_area) / sq_km_converter - - predPositive_area = (predPositive * cell_area) / sq_km_converter - predNegative_area = (predNegative * cell_area) / sq_km_converter - obsPositive_area = (obsPositive * cell_area) / sq_km_converter - obsNegative_area = (obsNegative * cell_area) / sq_km_converter - positiveDiff_area = predPositive_area - obsPositive_area - - if masked_count != None: - masked_area = (masked_count * cell_area) / sq_km_converter - else: - masked_area = None - - # If no cell_area is provided, then the contingeny tables are likely not derived from areal analysis. - else: - TP_area = None - FP_area = None - TN_area = None - FN_area = None - area = None - - predPositive_area = None - predNegative_area = None - obsPositive_area = None - obsNegative_area = None - positiveDiff_area = None - MCC = None - - if total_population == 0: - predPositive_perc, predNegative_perc, obsPositive_perc, obsNegative_perc , positiveDiff_perc = "NA", "NA", "NA", "NA", "NA" - else: - predPositive_perc = (predPositive / total_population) * 100 - predNegative_perc = (predNegative / total_population) * 100 - obsPositive_perc = (obsPositive / total_population) * 100 - obsNegative_perc = (obsNegative / total_population) * 100 - - positiveDiff_perc = predPositive_perc - obsPositive_perc - - if total_population == 0: - prevalence = "NA" - else: - prevalence = (true_positives + false_negatives) / total_population - - try: - PPV = true_positives / predPositive - except ZeroDivisionError: - PPV = "NA" - - try: - NPV = true_negatives / predNegative - except ZeroDivisionError: - NPV = "NA" - - try: - TNR = true_negatives / obsNegative - except ZeroDivisionError: - TNR = "NA" - - try: - TPR = true_positives / obsPositive - - except ZeroDivisionError: - TPR = "NA" - - try: - Bal_ACC = np.mean([TPR,TNR]) - except TypeError: - Bal_ACC = "NA" - - if total_population == 0: - ACC = "NA" - else: - ACC = (true_positives + true_negatives) / total_population - - try: - F1_score = (2*true_positives) / (2*true_positives + false_positives + false_negatives) - except ZeroDivisionError: - F1_score = "NA" - - stats_dictionary = {'true_negatives_count': int(true_negatives), - 'false_negatives_count': int(false_negatives), - 'true_positives_count': int(true_positives), - 'false_positives_count': int(false_positives), - 'contingency_tot_count': int(total_population), - 'cell_area_m2': cell_area, - - 'TP_area_km2': TP_area, - 'FP_area_km2': FP_area, - 'TN_area_km2': TN_area, - 'FN_area_km2': FN_area, - - 'contingency_tot_area_km2': area, - 'predPositive_area_km2': predPositive_area, - 'predNegative_area_km2': predNegative_area, - 'obsPositive_area_km2': obsPositive_area, - 'obsNegative_area_km2': obsNegative_area, - 'positiveDiff_area_km2': positiveDiff_area, - - 'CSI': CSI, - 'FAR': FAR, - 'TPR': TPR, - 'TNR': TNR, - - 'PPV': PPV, - 'NPV': NPV, - 'ACC': ACC, - 'Bal_ACC': Bal_ACC, - 'MCC': MCC, - 'EQUITABLE_THREAT_SCORE': EQUITABLE_THREAT_SCORE, - 'PREVALENCE': prevalence, - 'BIAS': BIAS, - 'F1_SCORE': F1_score, - - 'TP_perc': TP_perc, - 'FP_perc': FP_perc, - 'TN_perc': TN_perc, - 'FN_perc': FN_perc, - 'predPositive_perc': predPositive_perc, - 'predNegative_perc': predNegative_perc, - 'obsPositive_perc': obsPositive_perc, - 'obsNegative_perc': obsNegative_perc, - 'positiveDiff_perc': positiveDiff_perc, - - 'masked_count': int(masked_count), - 'masked_perc': masked_perc, - 'masked_area_km2': masked_area, - - } - - return stats_dictionary - - -def get_contingency_table_from_binary_rasters(benchmark_raster_path, predicted_raster_path, agreement_raster=None, mask_values=None, mask_dict={}): - """ - Produces contingency table from 2 rasters and returns it. Also exports an agreement raster classified as: - 0: True Negatives - 1: False Negative - 2: False Positive - 3: True Positive - - Args: - benchmark_raster_path (str): Path to the binary benchmark raster. 0 = phenomena not present, 1 = phenomena present, NoData = NoData. - predicted_raster_path (str): Path to the predicted raster. 0 = phenomena not present, 1 = phenomena present, NoData = NoData. - - Returns: - contingency_table_dictionary (dict): A Python dictionary of a contingency table. Key/value pair formatted as: - {true_negatives: int, false_negatives: int, false_positives: int, true_positives: int} - - """ - from rasterio.warp import reproject, Resampling - import rasterio - import numpy as np - import os - import rasterio.mask - import geopandas as gpd - from shapely.geometry import box - - print("-----> Evaluating performance across the total area...") - # Load rasters. - benchmark_src = rasterio.open(benchmark_raster_path) - predicted_src = rasterio.open(predicted_raster_path) - predicted_array = predicted_src.read(1) - - benchmark_array_original = benchmark_src.read(1) - - if benchmark_array_original.shape != predicted_array.shape: - benchmark_array = np.empty(predicted_array.shape, dtype=np.int8) - - reproject(benchmark_array_original, - destination = benchmark_array, - src_transform = benchmark_src.transform, - src_crs = benchmark_src.crs, - src_nodata = benchmark_src.nodata, - dst_transform = predicted_src.transform, - dst_crs = predicted_src.crs, - dst_nodata = benchmark_src.nodata, - dst_resolution = predicted_src.res, - resampling = Resampling.nearest) - - predicted_array_raw = predicted_src.read(1) - - # Align the benchmark domain to the modeled domain. - benchmark_array = np.where(predicted_array==predicted_src.nodata, 10, benchmark_array) - - # Ensure zeros and ones for binary comparison. Assume that positive values mean flooding and 0 or negative values mean dry. - predicted_array = np.where(predicted_array==predicted_src.nodata, 10, predicted_array) # Reclassify NoData to 10 - predicted_array = np.where(predicted_array<0, 0, predicted_array) - predicted_array = np.where(predicted_array>0, 1, predicted_array) - - benchmark_array = np.where(benchmark_array==benchmark_src.nodata, 10, benchmark_array) # Reclassify NoData to 10 - - agreement_array = np.add(benchmark_array, 2*predicted_array) - agreement_array = np.where(agreement_array>4, 10, agreement_array) - - del benchmark_src, benchmark_array, predicted_array, predicted_array_raw - - # Loop through exclusion masks and mask the agreement_array. - if mask_dict != {}: - for poly_layer in mask_dict: - - operation = mask_dict[poly_layer]['operation'] - - if operation == 'exclude': - - poly_path = mask_dict[poly_layer]['path'] - buffer_val = mask_dict[poly_layer]['buffer'] - - reference = predicted_src - - bounding_box = gpd.GeoDataFrame({'geometry': box(*reference.bounds)}, index=[0], crs=reference.crs) - #Read layer using the bbox option. CRS mismatches are handled if bbox is passed a geodataframe (which it is). - poly_all = gpd.read_file(poly_path, bbox = bounding_box) - - # Make sure features are present in bounding box area before projecting. Continue to next layer if features are absent. - if poly_all.empty: - continue - - print("-----> Masking at " + poly_layer + "...") - #Project layer to reference crs. - poly_all_proj = poly_all.to_crs(reference.crs) - # check if there are any lakes within our reference raster extent. - if poly_all_proj.empty: - #If no features within reference raster extent, create a zero array of same shape as reference raster. - poly_mask = np.zeros(reference.shape) - else: - #Check if a buffer value is passed to function. - if buffer_val is None: - #If features are present and no buffer is passed, assign geometry to variable. - geometry = poly_all_proj.geometry - else: - #If features are present and a buffer is passed, assign buffered geometry to variable. - geometry = poly_all_proj.buffer(buffer_val) - - #Perform mask operation on the reference raster and using the previously declared geometry geoseries. Invert set to true as we want areas outside of poly areas to be False and areas inside poly areas to be True. - in_poly,transform,c = rasterio.mask.raster_geometry_mask(reference, geometry, invert = True) - #Write mask array, areas inside polys are set to 1 and areas outside poly are set to 0. - poly_mask = np.where(in_poly == True, 1,0) - - # Perform mask. - masked_agreement_array = np.where(poly_mask == 1, 4, agreement_array) - - # Get rid of masked values outside of the modeled domain. - agreement_array = np.where(agreement_array == 10, 10, masked_agreement_array) - - contingency_table_dictionary = {} # Initialize empty dictionary. - - # Only write the agreement raster if user-specified. - if agreement_raster != None: - with rasterio.Env(): - profile = predicted_src.profile - profile.update(nodata=10) - with rasterio.open(agreement_raster, 'w', **profile) as dst: - dst.write(agreement_array, 1) - - # Write legend text file - legend_txt = os.path.join(os.path.split(agreement_raster)[0], 'read_me.txt') - - from datetime import datetime - - now = datetime.now() - current_time = now.strftime("%m/%d/%Y %H:%M:%S") - - with open(legend_txt, 'w') as f: - f.write("%s\n" % '0: True Negative') - f.write("%s\n" % '1: False Negative') - f.write("%s\n" % '2: False Positive') - f.write("%s\n" % '3: True Positive') - f.write("%s\n" % '4: Masked area (excluded from contingency table analysis). Mask layers: {mask_dict}'.format(mask_dict=mask_dict)) - f.write("%s\n" % 'Results produced at: {current_time}'.format(current_time=current_time)) - - # Store summed pixel counts in dictionary. - contingency_table_dictionary.update({'total_area':{'true_negatives': int((agreement_array == 0).sum()), - 'false_negatives': int((agreement_array == 1).sum()), - 'false_positives': int((agreement_array == 2).sum()), - 'true_positives': int((agreement_array == 3).sum()), - 'masked_count': int((agreement_array == 4).sum()), - 'file_handle': 'total_area' - - }}) - - # After agreement_array is masked with default mask layers, check for inclusion masks in mask_dict. - if mask_dict != {}: - for poly_layer in mask_dict: - - operation = mask_dict[poly_layer]['operation'] - - if operation == 'include': - poly_path = mask_dict[poly_layer]['path'] - buffer_val = mask_dict[poly_layer]['buffer'] - - reference = predicted_src - - bounding_box = gpd.GeoDataFrame({'geometry': box(*reference.bounds)}, index=[0], crs=reference.crs) - #Read layer using the bbox option. CRS mismatches are handled if bbox is passed a geodataframe (which it is). - poly_all = gpd.read_file(poly_path, bbox = bounding_box) - - # Make sure features are present in bounding box area before projecting. Continue to next layer if features are absent. - if poly_all.empty: - continue - - print("-----> Evaluating performance at " + poly_layer + "...") - #Project layer to reference crs. - poly_all_proj = poly_all.to_crs(reference.crs) - # check if there are any lakes within our reference raster extent. - if poly_all_proj.empty: - #If no features within reference raster extent, create a zero array of same shape as reference raster. - poly_mask = np.zeros(reference.shape) - else: - #Check if a buffer value is passed to function. - if buffer_val is None: - #If features are present and no buffer is passed, assign geometry to variable. - geometry = poly_all_proj.geometry - else: - #If features are present and a buffer is passed, assign buffered geometry to variable. - geometry = poly_all_proj.buffer(buffer_val) - - #Perform mask operation on the reference raster and using the previously declared geometry geoseries. Invert set to true as we want areas outside of poly areas to be False and areas inside poly areas to be True. - in_poly,transform,c = rasterio.mask.raster_geometry_mask(reference, geometry, invert = True) - #Write mask array, areas inside polys are set to 1 and areas outside poly are set to 0. - poly_mask = np.where(in_poly == True, 1, 0) - - # Perform mask. - masked_agreement_array = np.where(poly_mask == 0, 4, agreement_array) # Changed to poly_mask == 0 - - # Get rid of masked values outside of the modeled domain. - temp_agreement_array = np.where(agreement_array == 10, 10, masked_agreement_array) - - if buffer_val == None: # The buffer used is added to filename, and 0 is easier to read than None. - buffer_val = 0 - - poly_handle = poly_layer + '_b' + str(buffer_val) + 'm' - - # Write the layer_agreement_raster. - layer_agreement_raster = os.path.join(os.path.split(agreement_raster)[0], poly_handle + '_agreement.tif') - with rasterio.Env(): - profile = predicted_src.profile - profile.update(nodata=10) - with rasterio.open(layer_agreement_raster, 'w', **profile) as dst: - dst.write(temp_agreement_array, 1) - - - # Store summed pixel counts in dictionary. - contingency_table_dictionary.update({poly_handle:{'true_negatives': int((temp_agreement_array == 0).sum()), - 'false_negatives': int((temp_agreement_array == 1).sum()), - 'false_positives': int((temp_agreement_array == 2).sum()), - 'true_positives': int((temp_agreement_array == 3).sum()), - 'masked_count': int((temp_agreement_array == 4).sum()), - 'file_handle': poly_handle - }}) - - return contingency_table_dictionary - diff --git a/tools/.env.template b/tools/.env.template new file mode 100644 index 000000000..048c2283c --- /dev/null +++ b/tools/.env.template @@ -0,0 +1,6 @@ +API_BASE_URL= +EVALUATED_SITES_CSV= +WBD_LAYER= +NWM_FLOWS_MS= +USGS_METADATA_URL= +USGS_DOWNLOAD_URL= diff --git a/tools/.gitignore b/tools/.gitignore new file mode 100644 index 000000000..4c49bd78f --- /dev/null +++ b/tools/.gitignore @@ -0,0 +1 @@ +.env diff --git a/tests/utils/__init__.py b/tools/__init__.py old mode 100644 new mode 100755 similarity index 100% rename from tests/utils/__init__.py rename to tools/__init__.py diff --git a/tools/adjust_rc_with_feedback.py b/tools/adjust_rc_with_feedback.py new file mode 100644 index 000000000..35a924572 --- /dev/null +++ b/tools/adjust_rc_with_feedback.py @@ -0,0 +1,248 @@ +import argparse +import geopandas as gpd +from geopandas.tools import sjoin +import os +import rasterio +import pandas as pd +import numpy as np +import sys +import json + +temp_workspace = r'' +HAND_CRS = 'EPSG:3857' + +def update_rating_curve(fim_directory, output_csv, htable_path, output_src_json_file, huc6): + print("Processing huc --> " + str(huc6)) + log_file.write("\nProcessing huc --> " + str(huc6) + '\n') + df_gmed = pd.read_csv(output_csv) # read csv to import as a dataframe + df_gmed = df_gmed[df_gmed.hydroid != 0] # remove entries that do not have a valid hydroid + + # Read in the hydroTable.csv and check wether it has previously been updated (rename orig columns if needed) + df_htable = pd.read_csv(htable_path) + if 'orig_discharge_cms' in df_htable.columns: + df_htable = df_htable[['HydroID','feature_id','stage','orig_discharge_cms','HydraulicRadius (m)','WetArea (m2)','SLOPE','default_ManningN','HUC','LakeID']] + df_htable.rename(columns={'orig_discharge_cms':'discharge_cms','default_ManningN':'ManningN'}, inplace=True) + else: + df_htable = df_htable[['HydroID','feature_id','stage','discharge_cms','HydraulicRadius (m)','WetArea (m2)','SLOPE','ManningN','HUC','LakeID']] + + # loop through the user provided point data --> stage/flow dataframe row by row + for index, row in df_gmed.iterrows(): + df_htable_hydroid = df_htable[df_htable.HydroID == row.hydroid] # filter htable for entries with matching hydroid + find_src_stage = df_htable_hydroid.loc[df_htable_hydroid['stage'].sub(row.hand).abs().idxmin()] # find closest matching stage to the user provided HAND value + # copy the corresponding htable values for the matching stage->HAND lookup + df_gmed.loc[index,'src_stage'] = find_src_stage.stage + df_gmed.loc[index,'ManningN'] = find_src_stage.ManningN + df_gmed.loc[index,'SLOPE'] = find_src_stage.SLOPE + df_gmed.loc[index,'HydraulicRadius_m'] = find_src_stage['HydraulicRadius (m)'] + df_gmed.loc[index,'WetArea_m2'] = find_src_stage['WetArea (m2)'] + df_gmed.loc[index,'discharge_cms'] = find_src_stage.discharge_cms + + ## Create a df of hydroids and featureids + df_hydro_feat = df_htable.groupby(["HydroID"])[["feature_id"]].median() + #print(df_hydro_feat.to_string()) + + ## Calculate roughness using Manning's equation + df_gmed.rename(columns={'ManningN':'ManningN_default','hydroid':'HydroID'}, inplace=True) # rename the previous ManningN column + df_gmed['hydroid_ManningN'] = df_gmed['WetArea_m2']* \ + pow(df_gmed['HydraulicRadius_m'],2.0/3)* \ + pow(df_gmed['SLOPE'],0.5)/df_gmed['flow'] + print('Adjusted Mannings N Calculations -->') + print(df_gmed) + + # Create dataframe to check for erroneous Manning's n values (>0.6 or <0.001) + df_gmed['Mann_flag'] = np.where((df_gmed['hydroid_ManningN'] >= 0.6) | (df_gmed['hydroid_ManningN'] <= 0.001),'Fail','Pass') + df_mann_flag = df_gmed[(df_gmed['hydroid_ManningN'] >= 0.6) | (df_gmed['hydroid_ManningN'] <= 0.001)][['HydroID','hydroid_ManningN']] + print('Here is the df with mann_flag filter:') + print(df_mann_flag.to_string()) + if not df_mann_flag.empty: + log_file.write('!!! Flaged Mannings Roughness values below !!!' +'\n') + log_file.write(df_mann_flag.to_string() + '\n') + + # Export csv with the newly calculated Manning's N values + output_calc_n_csv = os.path.join(fim_directory, huc6, 'calc_src_n_vals_' + huc6 + '.csv') + df_gmed.to_csv(output_calc_n_csv,index=False) + + # filter the modified Manning's n dataframe for values out side allowable range + df_gmed = df_gmed[df_gmed['Mann_flag'] == 'Pass'] + + # Merge df with hydroid and featureid crosswalked + df_gmed = df_gmed.merge(df_hydro_feat, how='left', on='HydroID') + + # Create df with the most recent collection time entry + df_updated = df_gmed.groupby(["HydroID"])[['coll_time']].max() + df_updated.rename(columns={'coll_time':'last_updated'}, inplace=True) + + # cacluate median ManningN to handle cases with multiple hydroid entries + df_mann = df_gmed.groupby(["HydroID"])[['hydroid_ManningN']].median() + print('df_mann:') + print(df_mann) + + # Create a df with the median hydroid_ManningN value per feature_id + df_mann_featid = df_gmed.groupby(["feature_id"])[['hydroid_ManningN']].median() + df_mann_featid.rename(columns={'hydroid_ManningN':'featid_ManningN'}, inplace=True) + + # Rename the original hydrotable variables to allow new calculations to use the primary var name + df_htable.rename(columns={'ManningN':'default_ManningN','discharge_cms':'orig_discharge_cms'}, inplace=True) + + ## Check for large variabilty in the calculated Manning's N values (for cases with mutliple entries for a singel hydroid) + df_nrange = df_gmed.groupby('HydroID').agg({'hydroid_ManningN': ['median', 'min', 'max','count']}) + log_file.write('Statistics for Modified Roughness Calcs -->' +'\n') + log_file.write(df_nrange.to_string() + '\n') + log_file.write('----------------------------------------\n\n') + + # Merge the newly caluclated ManningN dataframe with the original hydroTable + df_htable = df_htable.merge(df_mann, how='left', on='HydroID') + df_htable = df_htable.merge(df_mann_featid, how='left', on='feature_id') + df_htable = df_htable.merge(df_updated, how='left', on='HydroID') + + # Create the modify_ManningN column by combining the hydroid_ManningN with the featid_ManningN (use feature_id value if the hydroid is in a feature_id that contains valid hydroid_ManningN value(s)) + df_htable['modify_ManningN'] = np.where(df_htable['hydroid_ManningN'].isnull(),df_htable['featid_ManningN'],df_htable['hydroid_ManningN']) + + # Create the ManningN column by combining the hydroid_ManningN with the default_ManningN (use modified where available) + df_htable['ManningN'] = np.where(df_htable['modify_ManningN'].isnull(),df_htable['default_ManningN'],df_htable['modify_ManningN']) + + # Calculate new discharge_cms with new ManningN + df_htable['discharge_cms'] = df_htable['WetArea (m2)']* \ + pow(df_htable['HydraulicRadius (m)'],2.0/3)* \ + pow(df_htable['SLOPE'],0.5)/df_htable['ManningN'] + + # Replace discharge_cms with 0 or -999 if present in the original discharge + df_htable['discharge_cms'].mask(df_htable['orig_discharge_cms']==0.0,0.0,inplace=True) + df_htable['discharge_cms'].mask(df_htable['orig_discharge_cms']==-999,-999,inplace=True) + + # Export a new hydroTable.csv and overwrite the previous version + out_htable = os.path.join(fim_directory, huc6, 'hydroTable.csv') + df_htable.to_csv(out_htable,index=False) + + # output new src json (overwrite previous) + output_src_json = dict() + hydroID_list = np.unique(df_htable['HydroID']) + + for hid in hydroID_list: + indices_of_hid = df_htable['HydroID'] == hid + stage_list = df_htable['stage'][indices_of_hid].astype(float) + q_list = df_htable['discharge_cms'][indices_of_hid].astype(float) + stage_list = stage_list.tolist() + q_list = q_list.tolist() + output_src_json[str(hid)] = { 'q_list' : q_list , 'stage_list' : stage_list } + + with open(output_src_json_file,'w') as f: + json.dump(output_src_json,f,sort_keys=True) + +def ingest_points_layer(points_layer, fim_directory, wbd_path): + + # Read wbd_path and points_layer to determine which HUC6 each point is in. + wbd_huc8_read = gpd.read_file(wbd_path, layer='WBDHU6') + points_layer_read = gpd.read_file(points_layer) + + # Update CRS of points_layer_read. + points_layer_read = points_layer_read.to_crs(HAND_CRS) + wbd_huc8_read = wbd_huc8_read.to_crs(HAND_CRS) + + # Spatial join the two layers. + water_edge_df = sjoin(points_layer_read, wbd_huc8_read) + + # Convert to GeoDataFrame. + gdf = gpd.GeoDataFrame(water_edge_df) + + # Add two columns for X and Y. + gdf['X'] = gdf['geometry'].x + gdf['Y'] = gdf['geometry'].y + + # Extract information into dictionary. + huc6_list = [] + for index, row in gdf.iterrows(): + huc6 = row['HUC6'] + if huc6 not in huc6_list: + huc6_list.append(huc6) + log_file.write(str(huc6) + '\n') + + # Define coords variable to be used in point raster value attribution. + coords = [(x,y) for x, y in zip(water_edge_df.X, water_edge_df.Y)] + + # Define paths to relevant HUC6 HAND data. + for huc6 in huc6_list: + print(huc6) + + # Define paths to relevant HUC6 HAND data and get necessary metadata for point rasterization. + hand_path = os.path.join(fim_directory, huc6, 'hand_grid_' + huc6 + '.tif') + if not os.path.exists(hand_path): + print("HAND grid for " + huc6 + " does not exist.") + continue + catchments_path = os.path.join(fim_directory, huc6, 'catchments_' + huc6 + '.tif') + if not os.path.exists(catchments_path): + print("Catchments grid for " + huc6 + " does not exist.") + continue + htable_path = os.path.join(fim_directory, huc6, 'hydroTable.csv') + if not os.path.exists(htable_path): + print("hydroTable for " + huc6 + " does not exist.") + continue + output_src_json_file = os.path.join(fim_directory, huc6, 'rating_curves_' + huc6 + '.json') + if not os.path.isfile(output_src_json_file): + print("Rating Curve JSON file for " + huc6 + " does not exist.") + continue + +# water_edge_df = water_edge_df[water_edge_df['HUC6'] == huc6] + + # Use point geometry to determine pixel values at catchment and HAND grids. + hand_src = rasterio.open(hand_path) + water_edge_df['hand'] = [h[0] for h in hand_src.sample(coords)] + hand_src.close() + catchments_src = rasterio.open(catchments_path) + water_edge_df['hydroid'] = [c[0] for c in catchments_src.sample(coords)] + #print(water_edge_df) + + # Get median HAND value for appropriate groups. + water_edge_median_ds = water_edge_df.groupby(["hydroid", "flow", "submitter", "coll_time", "flow_unit"])['hand'].median() + + output_csv = os.path.join(fim_directory, huc6, 'user_supplied_n_vals_' + huc6 + '.csv') + + water_edge_median_ds.to_csv(output_csv) + + # 1. Loop and find the corresponding hydroids in the Hydrotable + # 2. Grab slope, wetted area, hydraulic radius, and feature_id that correspond with the matching hydroids and HAND value for the nearest stage + # 3. Calculate new column for new roughness using the above info + # 3b. If multiple flows exist per hydroid, aggregate the resulting Manning Ns + # 3c. If range of resulting Manning Ns is high, notify human + # 4. Update Hydrotable + # 4a. Copy default flow and N columns to new columns with "_default" in the field name + # 4b. Overwrite the official flow and N columns with the new calculated values + # 4c. Add last_updated column with timestamp where values were changed, also add "submitter" column + # 5. What do we do in catchments that match the feature_id? + # 5a. If these catchments already have known data, then let it use those. If not, use new calculated Ns. + + update_rating_curve(fim_directory, output_csv, htable_path, output_src_json_file, huc6) + + + +if __name__ == '__main__': + # Parse arguments. + parser = argparse.ArgumentParser(description='Adjusts rating curve given a shapefile containing points of known water boundary.') + parser.add_argument('-p','--points-layer',help='Path to points layer containing known water boundary locations',required=True) + parser.add_argument('-d','--fim-directory',help='Parent directory of FIM-required datasets.',required=True) + parser.add_argument('-w','--wbd-path', help='Path to national HUC6 layer.',required=True) + + # Assign variables from arguments. + args = vars(parser.parse_args()) + points_layer = args['points_layer'] + fim_directory = args['fim_directory'] + wbd_path = args['wbd_path'] + + # Create log file for processing records + print('This may take a few minutes...') + sys.__stdout__ = sys.stdout + log_file = open(os.path.join(fim_directory,'log_rating_curve_adjust.log'),"w") + + ingest_points_layer(points_layer, fim_directory, wbd_path) + + # Open catchment, HAND, and point grids and determine pixel values for Hydroid, HAND value, and discharge value, respectively. + + # Open rating curve file(s). + + # Use three values to determine the hydroid rating curve(s) to update, then update them using a variation of Manning's Equation. + + # Ensure the JSON rating curve is updated and saved (overwitten). Consider adding attributes to document what was performed. + + # Close log file + sys.stdout = sys.__stdout__ + log_file.close() diff --git a/tests/aggregate_mannings_calibration.py b/tools/aggregate_mannings_calibration.py similarity index 99% rename from tests/aggregate_mannings_calibration.py rename to tools/aggregate_mannings_calibration.py index f94b1d025..c57b17776 100755 --- a/tests/aggregate_mannings_calibration.py +++ b/tools/aggregate_mannings_calibration.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 - import os import pandas as pd import csv diff --git a/tools/aggregate_metrics.py b/tools/aggregate_metrics.py new file mode 100755 index 000000000..7cc5951b5 --- /dev/null +++ b/tools/aggregate_metrics.py @@ -0,0 +1,271 @@ +#!/usr/bin/env python3 + +import json +import os +import csv + +import argparse + +TEST_CASES_DIR = r'/data/test_cases_new/' +# TEMP = r'/data/temp' + +# Search through all previous_versions in test_cases +from tools_shared_functions import compute_stats_from_contingency_table + +def create_master_metrics_csv(): + + # Construct header + metrics_to_write = ['true_negatives_count', + 'false_negatives_count', + 'true_positives_count', + 'false_positives_count', + 'contingency_tot_count', + 'cell_area_m2', + 'TP_area_km2', + 'FP_area_km2', + 'TN_area_km2', + 'FN_area_km2', + 'contingency_tot_area_km2', + 'predPositive_area_km2', + 'predNegative_area_km2', + 'obsPositive_area_km2', + 'obsNegative_area_km2', + 'positiveDiff_area_km2', + 'CSI', + 'FAR', + 'TPR', + 'TNR', + 'PPV', + 'NPV', + 'ACC', + 'Bal_ACC', + 'MCC', + 'EQUITABLE_THREAT_SCORE', + 'PREVALENCE', + 'BIAS', + 'F1_SCORE', + 'TP_perc', + 'FP_perc', + 'TN_perc', + 'FN_perc', + 'predPositive_perc', + 'predNegative_perc', + 'obsPositive_perc', + 'obsNegative_perc', + 'positiveDiff_perc', + 'masked_count', + 'masked_perc', + 'masked_area_km2' + ] + + additional_header_info_prefix = ['version', 'nws_lid', 'magnitude', 'huc'] + list_to_write = [additional_header_info_prefix + metrics_to_write + ['full_json_path'] + ['flow'] + ['benchmark_source']] + + for benchmark_type in ['ble', 'ahps']: + + if benchmark_type == 'ble': + + test_cases = r'/data/test_cases' + test_cases_list = os.listdir(test_cases) + # AHPS test_ids + versions_to_aggregate = ['fim_1_0_0', 'fim_2_3_3', 'fim_3_0_0_3_fr_c'] + + for test_case in test_cases_list: + try: + int(test_case.split('_')[0]) + + huc = test_case.split('_')[0] + previous_versions = os.path.join(test_cases, test_case, 'performance_archive', 'previous_versions') + + for magnitude in ['100yr', '500yr']: + for version in versions_to_aggregate: + version_dir = os.path.join(previous_versions, version) + magnitude_dir = os.path.join(version_dir, magnitude) + + if os.path.exists(magnitude_dir): + + magnitude_dir_list = os.listdir(magnitude_dir) + for f in magnitude_dir_list: + if '.json' in f: + flow = 'NA' + nws_lid = "NA" + benchmark_source = 'ble' + sub_list_to_append = [version, nws_lid, magnitude, huc] + full_json_path = os.path.join(magnitude_dir, f) + if os.path.exists(full_json_path): + stats_dict = json.load(open(full_json_path)) + for metric in metrics_to_write: + sub_list_to_append.append(stats_dict[metric]) + sub_list_to_append.append(full_json_path) + sub_list_to_append.append(flow) + sub_list_to_append.append(benchmark_source) + + list_to_write.append(sub_list_to_append) + + except ValueError: + pass + + if benchmark_type == 'ahps': + + test_cases = r'/data/test_cases_ahps_testing' + test_cases_list = os.listdir(test_cases) + # AHPS test_ids + versions_to_aggregate = ['fim_1_0_0_nws_1_21_2021', 'fim_1_0_0_usgs_1_21_2021', + 'fim_2_x_ms_nws_1_21_2021', 'fim_2_x_ms_usgs_1_21_2021', + 'fim_3_0_0_3_ms_c_nws_1_21_2021', 'fim_3_0_0_3_ms_c_usgs_1_21_2021', + 'ms_xwalk_fill_missing_cal_nws', 'ms_xwalk_fill_missing_cal_usgs'] + + for test_case in test_cases_list: + try: + int(test_case.split('_')[0]) + + huc = test_case.split('_')[0] + previous_versions = os.path.join(test_cases, test_case, 'performance_archive', 'previous_versions') + + for magnitude in ['action', 'minor', 'moderate', 'major']: + for version in versions_to_aggregate: + + if 'nws' in version: + benchmark_source = 'ahps_nws' + if 'usgs' in version: + benchmark_source = 'ahps_usgs' + + version_dir = os.path.join(previous_versions, version) + magnitude_dir = os.path.join(version_dir, magnitude) + + if os.path.exists(magnitude_dir): + magnitude_dir_list = os.listdir(magnitude_dir) + for f in magnitude_dir_list: + if '.json' in f and 'total_area' not in f: + nws_lid = f[:5] + sub_list_to_append = [version, nws_lid, magnitude, huc] + full_json_path = os.path.join(magnitude_dir, f) + flow = '' + if os.path.exists(full_json_path): + # Get flow used to map. + if 'usgs' in version: + parent_dir = 'usgs_1_21_2021' + if 'nws' in version: + parent_dir = 'nws_1_21_2021' + + flow_file = os.path.join(test_cases, parent_dir, huc, nws_lid, magnitude, 'ahps_' + nws_lid + '_huc_' + huc + '_flows_' + magnitude + '.csv') + if os.path.exists(flow_file): + with open(flow_file, newline='') as csv_file: + reader = csv.reader(csv_file) + next(reader) + for row in reader: + flow = row[1] + if nws_lid == 'mcc01': + print(flow) + + stats_dict = json.load(open(full_json_path)) + for metric in metrics_to_write: + sub_list_to_append.append(stats_dict[metric]) + sub_list_to_append.append(full_json_path) + sub_list_to_append.append(flow) + sub_list_to_append.append(benchmark_source) + list_to_write.append(sub_list_to_append) + + except ValueError: + pass + + with open(output_csv, 'w', newline='') as csvfile: + csv_writer = csv.writer(csvfile) + csv_writer.writerows(list_to_write) + + + +def aggregate_metrics(config="DEV", branch="", hucs="", special_string="", outfolder=""): + + # Read hucs into list. + if hucs != "": + huc_list = [line.rstrip('\n') for line in open(hucs)] + + else: + huc_list = None + + if config == "DEV": + config_version = "development_versions" + elif config == "PREV": + config_version = "previous_versions" + + # Make directory to store output aggregates. + if special_string != "": + special_string = "_" + special_string + aggregate_output_dir = os.path.join(outfolder, 'aggregate_metrics', branch + '_aggregate_metrics' + special_string) + if not os.path.exists(aggregate_output_dir): + os.makedirs(aggregate_output_dir) + + test_cases_dir_list = os.listdir(TEST_CASES_DIR) + + for magnitude in ['100yr', '500yr', 'action', 'minor', 'moderate', 'major']: + huc_path_list = [['huc', 'path']] + true_positives, true_negatives, false_positives, false_negatives, cell_area, masked_count = 0, 0, 0, 0, 0, 0 + + for test_case in test_cases_dir_list: + + if test_case not in ['other', 'validation_data_ble', 'validation_data_legacy', 'validation_data_ahps']: + branch_results_dir = os.path.join(TEST_CASES_DIR, test_case, 'performance_archive', config_version, branch) + + huc = test_case.split('_')[0] + # Check that the huc is in the list of hucs to aggregate. + if huc_list != None and huc not in huc_list: + continue + + stats_json_path = os.path.join(branch_results_dir, magnitude, 'total_area_stats.json') + + # If there is a stats json for the test case and branch name, use it when aggregating stats. + if os.path.exists(stats_json_path): + json_dict = json.load(open(stats_json_path)) + + true_positives += json_dict['true_positives_count'] + true_negatives += json_dict['true_negatives_count'] + false_positives += json_dict['false_positives_count'] + false_negatives += json_dict['false_negatives_count'] + masked_count += json_dict['masked_count'] + + cell_area = json_dict['cell_area_m2'] + + huc_path_list.append([huc, stats_json_path]) + + + if cell_area == 0: + continue + + # Pass all sums to shared function to calculate metrics. + stats_dict = compute_stats_from_contingency_table(true_negatives, false_negatives, false_positives, true_positives, cell_area=cell_area, masked_count=masked_count) + + list_to_write = [['metric', 'value']] # Initialize header. + + for stat in stats_dict: + list_to_write.append([stat, stats_dict[stat]]) + + # Map path to output directory for aggregate metrics. + output_file = os.path.join(aggregate_output_dir, branch + '_aggregate_metrics_' + magnitude + special_string + '.csv') + + if cell_area != 0: + with open(output_file, 'w', newline='') as csvfile: + csv_writer = csv.writer(csvfile) + csv_writer.writerows(list_to_write) + csv_writer.writerow([]) + csv_writer.writerows(huc_path_list) + + print() + print("Finished aggregating for the '" + magnitude + "' magnitude. Aggregated metrics over " + str(len(huc_path_list)-1) + " test cases.") + print() + print("Results are at: " + output_file) + print() + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Aggregates a metric or metrics for multiple HUC8s.') + parser.add_argument('-c','--config',help='Save outputs to development_versions or previous_versions? Options: "DEV" or "PREV"',required=False) + parser.add_argument('-b','--branch',help='Name of branch to check all test_cases for and to aggregate.',required=True) + parser.add_argument('-u','--hucs',help='HUC8s to restrict the aggregation.',required=False, default="") + parser.add_argument('-s','--special_string',help='Special string to add to outputs.',required=False, default="") + parser.add_argument('-f','--outfolder',help='output folder',required=True,type=str) + + args = vars(parser.parse_args()) + + aggregate_metrics(**args) diff --git a/tools/cache_metrics.py b/tools/cache_metrics.py new file mode 100755 index 000000000..0d02fb217 --- /dev/null +++ b/tools/cache_metrics.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python3 + +import os +import argparse +import traceback + +from run_test_case import run_alpha_test +from multiprocessing import Pool + +TEST_CASES_DIR = r'/data/test_cases_new/' #TODO remove "_new" +PREVIOUS_FIM_DIR = r'/data/previous_fim' +OUTPUTS_DIR = r'/data/outputs' + + +def process_alpha_test(args): + + fim_run_dir = args[0] + version = args[1] + test_id = args[2] + magnitude = args[3] + archive_results = args[4] + + mask_type = 'huc' + + if archive_results == False: + compare_to_previous = True + else: + compare_to_previous = False + + try: + run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous=compare_to_previous, archive_results=archive_results, mask_type=mask_type) + except Exception: + traceback.print_exc() + + +if __name__ == '__main__': + + # Parse arguments. + parser = argparse.ArgumentParser(description='Caches metrics from previous versions of HAND.') + parser.add_argument('-c','--config',help='Save outputs to development_versions or previous_versions? Options: "DEV" or "PREV"',required=True) + parser.add_argument('-v','--fim-version',help='Name of fim version to cache.',required=False, default="all") + parser.add_argument('-j','--job-number',help='Number of processes to use. Default is 1.',required=False, default="1") + parser.add_argument('-s','--special-string',help='Add a special name to the end of the branch.',required=False, default="") + parser.add_argument('-b','--benchmark-category',help='Options include ble or ahps. Defaults to process both.',required=False, default=None) + + test_cases_dir_list = os.listdir(TEST_CASES_DIR) + + args = vars(parser.parse_args()) + + config = args['config'] + fim_version = args['fim_version'] + job_number = int(args['job_number']) + special_string = args['special_string'] + benchmark_category = args['benchmark_category'] + + if fim_version != "all": + previous_fim_list = [fim_version] + else: + previous_fim_list = os.listdir(PREVIOUS_FIM_DIR) + + if config == 'PREV': + archive_results = True + elif config == 'DEV': + archive_results = False + else: + print('Config (-c) option incorrectly set. Use "DEV" or "PREV"') + + benchmark_category_list = [] + + if benchmark_category == None: + for d in test_cases_dir_list: + if 'test_cases' in d: + benchmark_category_list.append(d.replace('_test_cases', '')) + else: + benchmark_category_list = [benchmark_category] + + procs_list = [] + for bench_cat in benchmark_category_list: + bench_cat_test_case_dir = os.path.join(TEST_CASES_DIR, bench_cat + '_test_cases') + + bench_cat_test_case_list = os.listdir(bench_cat_test_case_dir) + + for test_id in bench_cat_test_case_list: + if 'validation' and 'other' not in test_id: + + current_huc = test_id.split('_')[0] + if test_id.split('_')[1] in bench_cat: + + for version in previous_fim_list: + + if config == 'DEV': + fim_run_dir = os.path.join(OUTPUTS_DIR, version, current_huc) + elif config == 'PREV': + fim_run_dir = os.path.join(PREVIOUS_FIM_DIR, version, current_huc) + + if not os.path.exists(fim_run_dir): + fim_run_dir = os.path.join(PREVIOUS_FIM_DIR, version, current_huc[:6]) # For previous versions of HAND computed at HUC6 scale + + if os.path.exists(fim_run_dir): + if special_string != "": + version = version + '_' + special_string + + if 'ble' in test_id: + magnitude = ['100yr', '500yr'] + elif 'usgs' or 'nws' in test_id: + magnitude = ['action', 'minor', 'moderate', 'major'] + else: + continue + + print("Adding " + test_id + " to list of test_ids to process...") + if job_number > 1: + procs_list.append([fim_run_dir, version, test_id, magnitude, archive_results]) + else: + process_alpha_test([fim_run_dir, version, test_id, magnitude, archive_results]) + + if job_number > 1: + with Pool(processes=job_number) as pool: + pool.map(process_alpha_test, procs_list) diff --git a/tools/check_deep_flooding.py b/tools/check_deep_flooding.py new file mode 100644 index 000000000..e9bfd746c --- /dev/null +++ b/tools/check_deep_flooding.py @@ -0,0 +1,110 @@ +import argparse +import os +from multiprocessing import Pool +import numpy as np +import rasterio.shutil +from rasterio.warp import calculate_default_transform, reproject, Resampling +import rasterio.crs +import rasterio +import rasterio.mask +import geopandas as gpd +from shapely.geometry import box + + +def check_deep_flooding(args): + + depth_grid_path = args[0] + shapefile_path = args[1] + depth_threshold = args[2] + output_dir = args[3] + + print("Checking " + depth_grid_path + "...") + + # Open depth_grid_path and shapefile_path and perform np.wheres + depth_src = rasterio.open(depth_grid_path) + depth_array = depth_src.read(1) + reference = depth_src + + #Read layer using the bbox option. CRS mismatches are handled if bbox is passed a geodataframe (which it is). + bounding_box = gpd.GeoDataFrame({'geometry': box(*reference.bounds)}, index=[0], crs=reference.crs) + poly_all = gpd.read_file(shapefile_path, bbox = bounding_box) + + # Make sure features are present in bounding box area before projecting. Continue to next layer if features are absent. + if poly_all.empty: + return + + #Project layer to reference crs. + poly_all_proj = poly_all.to_crs(reference.crs) + # check if there are any lakes within our reference raster extent. + if poly_all_proj.empty: + #If no features within reference raster extent, create a zero array of same shape as reference raster. + poly_mask = np.zeros(reference.shape) + else: + #Perform mask operation on the reference raster and using the previously declared geometry geoseries. Invert set to true as we want areas outside of poly areas to be False and areas inside poly areas to be True. + geometry = poly_all_proj.geometry + in_poly,transform,c = rasterio.mask.raster_geometry_mask(reference, geometry, invert = True) + #Write mask array, areas inside polys are set to 1 and areas outside poly are set to 0. + poly_mask = np.where(in_poly == True, 1,0) + + # Filter depth_array by depth_threshold + filtered_depth_array = np.where(depth_array > depth_threshold, depth_array, -1) + + # Perform mask. + masked_depth_array = np.where(poly_mask == 1, filtered_depth_array, -1) + + if np.amax(masked_depth_array) > 0: + + file_handle = os.path.split(depth_grid_path)[1] + + checked_depth_raster = os.path.join(output_dir, "checked_" + str(depth_threshold) + "_" + file_handle) + + print("Writing " + checked_depth_raster + "...") + # Write output. + with rasterio.Env(): + profile = depth_src.profile + profile.update(nodata=-1) + with rasterio.open(checked_depth_raster, 'w', **profile) as dst: + dst.write(masked_depth_array, 1) + + +if __name__ == '__main__': + + # Parse arguments. + parser = argparse.ArgumentParser(description='Checks for deep flooding in a specified shapefile. Requires a directory of depth grids and a shapefile.') + parser.add_argument('-d','--depth-grid-dir',help='Name of directory containing outputs of depth outputs of inundation.py',required=True) + parser.add_argument('-s','--shapefile-path',help='Path to shapefile to be used as the overlay.',required=True) + parser.add_argument('-t','--depth-threshold',help='Depth in meters to use as checking threshold.',required=True) + parser.add_argument('-o', '--output-dir',help='The path to a directory to write the outputs. If not used, the inundation_review directory is used by default -> type=str',required=True, default="") + parser.add_argument('-j', '--job-number',help='The number of jobs',required=False,default=1) + + args = vars(parser.parse_args()) + + depth_grid_dir = args['depth_grid_dir'] + shapefile_path = args['shapefile_path'] + depth_threshold = int(args['depth_threshold']) + output_dir = args['output_dir'] + job_number = int(args['job_number']) + + # Get list of files in depth_grid_dir. + # Loop through files and determine which ones are depth grids, adding them to a list. + + depth_grid_dir_list = os.listdir(depth_grid_dir) + if not os.path.exists(output_dir): + os.mkdir(output_dir) + + procs_list = [] + + for f in depth_grid_dir_list: + if 'depth' in f: + full_f_path = os.path.join(depth_grid_dir, f) + +# check_deep_flooding([full_f_path, shapefile_path, depth_threshold, output_dir]) + procs_list.append([full_f_path, shapefile_path, depth_threshold, output_dir]) + + # Multiprocess. + with Pool(processes=job_number) as pool: + pool.map(check_deep_flooding, procs_list) + + + + \ No newline at end of file diff --git a/tools/code_standardizer/Dockerfile b/tools/code_standardizer/Dockerfile new file mode 100755 index 000000000..0c58dd0a9 --- /dev/null +++ b/tools/code_standardizer/Dockerfile @@ -0,0 +1,12 @@ +FROM python:3.8.5-slim-buster + +ENV PYTHONUNBUFFERED 1 +RUN mkdir -p /opt/code_standardizer +WORKDIR /opt/code_standardizer + +COPY requirements.txt . +RUN pip install -r requirements.txt --no-cache-dir +COPY . /opt/code_standardizer + +RUN chmod +x /opt/code_standardizer/entrypoint.sh +ENTRYPOINT ["/opt/code_standardizer/entrypoint.sh"] diff --git a/tools/code_standardizer/entrypoint.sh b/tools/code_standardizer/entrypoint.sh new file mode 100755 index 000000000..417a9a08d --- /dev/null +++ b/tools/code_standardizer/entrypoint.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +umask 002 +cd /cahaba + +echo "Starting Code Standardizer" + +echo "Running Python Black..." +black . + +echo "Running iSort..." +isort --profile black . + +echo "Running Flake8..." +flake8 . + +echo " ALL DONE!" diff --git a/tools/code_standardizer/requirements.txt b/tools/code_standardizer/requirements.txt new file mode 100755 index 000000000..c8fd24694 --- /dev/null +++ b/tools/code_standardizer/requirements.txt @@ -0,0 +1,3 @@ +black==21.9b0 +flake8==3.9.2 +isort==5.9.3 diff --git a/tools/composite_ms_fr_inundation.py b/tools/composite_ms_fr_inundation.py new file mode 100644 index 000000000..e0153e143 --- /dev/null +++ b/tools/composite_ms_fr_inundation.py @@ -0,0 +1,213 @@ +#!/usr/bin/env python3 +import os, argparse, rasterio +import numpy as np +import pandas as pd + +from inundation import inundate +from gms_tools.mosaic_inundation import Mosaic_inundation, __append_id_to_file_name + + +def composite_inundation(fim_dir_ms, fim_dir_fr, huc, flows, composite_output_dir, ouput_name='', + bin_rast_flag=False, depth_rast_flag=False, clean=True, quiet=True): + """ + Runs `inundate()` on FIM 3.X mainstem (MS) and full-resolution (FR) outputs and composites results. Assumes that all `fim_run` products + necessary for `inundate()` are in each huc8 folder. + + Parameters + ---------- + fim_dir_ms : str + Path to MS FIM directory. This should be an output directory from `fim_run.sh`. + fim_dir_fr : str + Path to FR FIM directory. This should be an output directory from `fim_run.sh`. + huc : str + HUC8 to run `inundate()`. This should be a folder within both `fim_dir_ms` and `fim_dir_fr`. + flows : str or pandas.DataFrame, can be a single file or a comma-separated list of files + File path to forecast csv or Pandas DataFrame with correct column names. + composite_output_dir : str + Folder path to write outputs. It will be created if it does not exist. + ouput_name : str, optional + Name for output raster. If not specified, by default the raster will be named 'inundation_composite_{flows_root}.tif'. + bin_rast_flag : bool, optional + Flag to create binary raster as output. If no raster flags are passed, this is the default behavior. + depth_rast_flag : bool, optional + Flag to create depth raster as output. + clean : bool, optional + If True, intermediate files are deleted. + quiet : bool, optional + Quiet output. + + Returns + ------- + None + + Raises + ------ + TypeError + Wrong input data types + AssertionError + Wrong input data types + + Notes + ----- + - Specifying a subset of the domain in rem or catchments to inundate on is achieved by the HUCs file or the forecast file. + + Examples + -------- + >>> import composite_ms_fr_inundation + >>> composite_ms_fr_inundation.composite_inundation( + '/home/user/fim_ouput_mainstem', + '/home/user/fim_ouput_fullres', + '12090301', + '/home/user/forecast_file.csv', + '/home/user/fim_inundation_composite', + 'inundation_composite.tif', + True, + False) + """ + # Set inundation raster to True if no output type flags are passed + if not (bin_rast_flag or depth_rast_flag): + bin_rast_flag = True + assert not (bin_rast_flag and depth_rast_flag), 'Output can only be binary or depth grid, not both' + assert os.path.isdir(fim_dir_ms), f'{fim_dir_ms} is not a directory. Please specify an existing MS FIM directory.' + assert os.path.isdir(fim_dir_fr), f'{fim_dir_fr} is not a directory. Please specify an existing FR FIM directory.' + assert os.path.exists(flows), f'{flows} does not exist. Please specify a flow file.' + + # Instantiate output variables + var_keeper = { + 'ms': { + 'dir': fim_dir_ms, + 'outputs': { + 'inundation_rast': os.path.join(composite_output_dir, f'{huc}_inundation_ms.tif') if bin_rast_flag else None, + 'depth_rast': os.path.join(composite_output_dir, f'{huc}_depth_ms.tif') if depth_rast_flag else None + } + }, + 'fr': { + 'dir': fim_dir_fr, + 'outputs': { + 'inundation_rast': os.path.join(composite_output_dir, f'{huc}_inundation_fr.tif') if bin_rast_flag else None, + 'depth_rast': os.path.join(composite_output_dir, f'{huc}_depth_fr.tif') if depth_rast_flag else None + } + } + } + # Build inputs to inundate() based on the input folders and huc + if not quiet: print(f"HUC {huc}") + for extent in var_keeper: + rem = os.path.join(var_keeper[extent]['dir'], huc, 'rem_zeroed_masked.tif') + catchments = os.path.join(var_keeper[extent]['dir'], huc, 'gw_catchments_reaches_filtered_addedAttributes.tif') + catchment_poly = os.path.join(var_keeper[extent]['dir'], huc, 'gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg') + hydro_table = os.path.join(var_keeper[extent]['dir'], huc, 'hydroTable.csv') + + # Ensure that all of the required files exist in the huc directory + for file in (rem, catchments, catchment_poly, hydro_table): + if not os.path.exists(file): + raise Exception(f"The following file does not exist within the supplied FIM directory:\n{file}") + + # Run inundation() + extent_friendly = "mainstem (MS)" if extent=="ms" else "full-resolution (FR)" + grid_type = "an inundation" if bin_rast_flag else "a depth" + if not quiet: print(f" Creating {grid_type} map for the {extent_friendly} configuration...") + result = inundate(rem,catchments,catchment_poly,hydro_table,flows,mask_type=None, + inundation_raster= var_keeper[extent]['outputs']['inundation_rast'], + depths= var_keeper[extent]['outputs']['depth_rast'], + quiet= quiet) + if result != 0: + raise Exception(f"Failed to inundate {rem} using the provided flows.") + + # If no output name supplied, create one using the flows file name + if not ouput_name: + flows_root = os.path.splitext(os.path.basename(flows))[0] + ouput_name = os.path.join(composite_output_dir, f'inundation_composite_{flows_root}.tif') + else: + ouput_name = os.path.join(composite_output_dir, ouput_name) + + # Composite MS and FR + inundation_map_file = { + 'huc8' : [huc] * 2, + 'branchID' : [None] * 2, + 'inundation_rasters': [var_keeper['fr']['outputs']['inundation_rast'], + var_keeper['ms']['outputs']['inundation_rast']], + 'depths_rasters': [var_keeper['fr']['outputs']['depth_rast'], + var_keeper['ms']['outputs']['depth_rast']] + } + inundation_map_file = pd.DataFrame(inundation_map_file) + Mosaic_inundation( + inundation_map_file, + mosaic_attribute='depths_rasters' if depth_rast_flag else 'inundation_rasters', + mosaic_output=ouput_name, + mask=catchment_poly, + unit_attribute_name='huc8', + nodata=-9999, + workers=1, + remove_inputs=clean, + subset=None,verbose=not quiet + ) + if bin_rast_flag: + hydroid_to_binary(__append_id_to_file_name(ouput_name, huc)) + +def hydroid_to_binary(hydroid_raster_filename): + '''Converts hydroid positive/negative grid to 1/0''' + + #to_bin = lambda x: np.where(x > 0, 1, np.where(x == 0, -9999, 0)) + to_bin = lambda x: np.where(x > 0, 1, np.where(x != -9999, 0, -9999)) + hydroid_raster = rasterio.open(hydroid_raster_filename) + profile = hydroid_raster.profile # get profile for new raster creation later on + profile['nodata'] = -9999 + bin_raster = to_bin(hydroid_raster.read(1)) # converts neg/pos to 0/1 + # Overwrite inundation raster + with rasterio.open(hydroid_raster_filename, "w", **profile) as out_raster: + out_raster.write(bin_raster.astype(hydroid_raster.profile['dtype']), 1) + del hydroid_raster,profile,bin_raster + + +if __name__ == '__main__': + + # parse arguments + parser = argparse.ArgumentParser(description='Inundate FIM 3 full resolution and mainstem outputs using a flow file and composite the results.') + parser.add_argument('-ms','--fim-dir-ms',help='Directory that contains MS FIM outputs.',required=True) + parser.add_argument('-fr','--fim-dir-fr',help='Directory that contains FR FIM outputs.',required=True) + parser.add_argument('-u','--huc',help='HUC within FIM directories to inunundate. Can be a comma-separated list.',required=True) + parser.add_argument('-f','--flows-file',help='File path of flows csv or comma-separated list of paths if running multiple HUCs',required=True) + parser.add_argument('-o','--ouput-dir',help='Folder to write Composite Raster output.',required=True) + parser.add_argument('-n','--ouput-name',help='File name for output(s).',default=None,required=False) + parser.add_argument('-b','--bin-raster',help='Output raster is a binary wet/dry grid. This is the default if no raster flags are passed.',required=False,default=False,action='store_true') + parser.add_argument('-d','--depth-raster',help='Output raster is a depth grid.',required=False,default=False,action='store_true') + parser.add_argument('-j','--num-workers',help='Number of concurrent processesto run.',required=False,default=1,type=int) + parser.add_argument('-c','--clean',help='If flag used, intermediate rasters are NOT cleaned up.',required=False,default=True,action='store_false') + parser.add_argument('-q','--quiet',help='Quiet terminal output.',required=False,default=False,action='store_true') + + # Extract to dictionary and assign to variables. + args = vars(parser.parse_args()) + fim_dir_ms = args['fim_dir_ms'] + fim_dir_fr = args['fim_dir_fr'] + hucs = args['huc'].replace(' ', '').split(',') + flows_files = args['flows_file'].replace(' ', '').split(',') + num_workers = int(args['num_workers']) + output_dir = args['ouput_dir'] + ouput_name = args['ouput_name'] + bin_raster = bool(args['bin_raster']) + depth_raster = bool(args['depth_raster']) + clean = bool(args['clean']) + quiet = bool(args['quiet']) + + assert num_workers >= 1, "Number of workers should be 1 or greater" + assert len(flows_files) == len(hucs), "Number of hucs must be equal to the number of forecasts provided" + assert not (bin_raster and depth_raster), "Cannot use both -b and -d flags" + + # Create output directory if it does not exist + if not os.path.isdir(output_dir): + os.mkdir(output_dir) + + # Create nested list for input into multi-threading + arg_list = [] + for huc, flows_file in zip(hucs, flows_files): + arg_list.append((fim_dir_ms, fim_dir_fr, huc, flows_file, output_dir, ouput_name, bin_raster, depth_raster, clean, quiet)) + + # Multi-thread for each huc in input hucs + if num_workers > 1: + from multiprocessing import Pool + with Pool(processes=num_workers) as pool: + # Run composite_inundation() + pool.starmap(composite_inundation, arg_list) + else: # run linear if jobs == 1 + for arg in arg_list: + composite_inundation(*arg) diff --git a/tests/preprocess/create_flow_forecast_file.py b/tools/create_flow_forecast_file.py old mode 100644 new mode 100755 similarity index 76% rename from tests/preprocess/create_flow_forecast_file.py rename to tools/create_flow_forecast_file.py index e7df52e15..bb8833343 --- a/tests/preprocess/create_flow_forecast_file.py +++ b/tools/create_flow_forecast_file.py @@ -1,8 +1,5 @@ -# -*- coding: utf-8 -*- -""" -Created on Wed Jul 29 11:48:37 2020 -@author: Fernando Aristizabal with edits by Trevor Grout -""" +#!/usr/bin/env python3 + import os import geopandas as gpd import argparse @@ -19,54 +16,54 @@ def create_flow_forecast_file(ble_geodatabase, nwm_geodatabase, output_parent_di Path to nwm geodatabase. output_parent_dir : STRING Output parent directory of output. Flow files will be output to subdirectories within parent directory. - ble_xs_layer_name : STRING - The cross section layer in the ble geodatabase to be imported. Default is 'XS' (sometimes it is 'XS_1D') - ble_huc_layer_name : STRING + ble_xs_layer_name : STRING + The cross section layer in the ble geodatabase to be imported. Default is 'XS' (sometimes it is 'XS_1D') + ble_huc_layer_name : STRING The huc layer in the ble geodatabase. Default is 'S_HUC_Ar' (sometimes it is 'S_HUC_ar' ) - ble_huc_id_field : STRING + ble_huc_id_field : STRING The attribute field within the ble_huc_layer_name containing the huc code. Default is 'HUC_CODE'. Assumes only 1 unique code. - nwm_stream_layer_name : STRING + nwm_stream_layer_name : STRING The stream centerline layer name (or partial layer name) for the NWM geodatabase. Default is 'RouteLink_FL_2020_04_07'. - nwm_feature_id_field : STRING + nwm_feature_id_field : STRING The feature id of the nwm segments. Default is 'ID' (applicable if nwmv2.1 is used) Returns ------- None. ''' - #Read the ble xs layer into a geopandas dataframe. + # Read the ble xs layer into a geopandas dataframe. xs_layer = gpd.read_file(ble_geodatabase,layer = ble_xs_layer_name) - #Read ble huc layer into a geopandas dataframe and extract the huc code. By default it assumes only one HUC in the layer (typically always the case). + # Read ble huc layer into a geopandas dataframe and extract the huc code. By default it assumes only one HUC in the layer (typically always the case). huc_layer = gpd.read_file(ble_geodatabase, layer = ble_huc_layer_name) [huc] = huc_layer[ble_huc_id_field].unique() - - #Read in the NWM stream layer into a geopandas dataframe using the bounding box option based on the extents of the BLE XS layer. + + # Read in the NWM stream layer into a geopandas dataframe using the bounding box option based on the extents of the BLE XS layer. nwm_river_layer = gpd.read_file(nwm_geodatabase, bbox = xs_layer, layer = nwm_stream_layer_name) - - #Make sure xs_layer is in same projection as nwm_river_layer. + + # Make sure xs_layer is in same projection as nwm_river_layer. xs_layer_proj = xs_layer.to_crs(nwm_river_layer.crs) - - #Perform an intersection of the BLE layers and the NWM layers, using the keep_geom_type set to False produces a point output. + + # Perform an intersection of the BLE layers and the NWM layers, using the keep_geom_type set to False produces a point output. intersection = gpd.overlay(xs_layer_proj, nwm_river_layer, how = 'intersection', keep_geom_type = False) - #Create the flow forecast files - #define fields containing flow (typically these won't change for BLE) + ## Create the flow forecast files + # Define fields containing flow (typically these won't change for BLE) flow_fields = ['E_Q_01PCT','E_Q_0_2PCT'] - #define return period associated with flow_fields (in same order as flow_fields). These will also serve as subdirectory names. + # Define return period associated with flow_fields (in same order as flow_fields). These will also serve as subdirectory names. return_period = ['100yr','500yr'] - #Conversion factor from CFS to CMS - dischargeMultiplier = 0.3048 ** 3 - - #Write individual flow csv files + # Conversion factor from CFS to CMS + dischargeMultiplier = 0.3048 ** 3 + + # Write individual flow csv files for i,flow in enumerate(flow_fields): - #Write dataframe with just ID and single flow event + # Write dataframe with just ID and single flow event forecast = intersection[[nwm_feature_id_field,flow]] - #Rename field names and re-define datatypes + # Rename field names and re-define datatypes forecast = forecast.rename(columns={nwm_feature_id_field :'feature_id',flow : 'discharge'}) forecast = forecast.astype({'feature_id' : int , 'discharge' : float}) @@ -74,18 +71,18 @@ def create_flow_forecast_file(ble_geodatabase, nwm_geodatabase, output_parent_di forecast = forecast.groupby('feature_id').median() forecast = forecast.reset_index(level=0) - #Convert CFS to CMS + # Convert CFS to CMS forecast['discharge'] = forecast['discharge'] * dischargeMultiplier - #Set paths and write file + # Set paths and write file output_dir = os.path.join(output_parent_dir, huc) dir_of_csv = os.path.join(output_dir,return_period[i]) os.makedirs(dir_of_csv,exist_ok = True) path_to_csv = os.path.join(dir_of_csv,"ble_huc_{}_flows_{}.csv".format(huc,return_period[i])) - forecast.to_csv(path_to_csv,index=False) - + forecast.to_csv(path_to_csv,index=False) + if __name__ == '__main__': - #Parse arguments + # Parse arguments parser = argparse.ArgumentParser(description = 'Produce forecast flow files from BLE datasets') parser.add_argument('-b', '--ble-geodatabase', help = 'BLE geodatabase (.gdb file extension). Will look for layer with "XS" in name. It is assumed the 100 year flow field is "E_Q_01PCT" and the 500 year flow field is "E_Q_0_2_PCT" as these are the default field names.', required = True) parser.add_argument('-n', '--nwm-geodatabase', help = 'NWM geodatabase (.gdb file extension).', required = True) @@ -95,9 +92,7 @@ def create_flow_forecast_file(ble_geodatabase, nwm_geodatabase, output_parent_di parser.add_argument('-huid', '--ble-huc-id-field', help = 'BLE id field in the ble-huc-layer-name. Default field is "HUC_CODE".', required = False, default = 'HUC_CODE') parser.add_argument('-l', '--nwm-stream-layer-name', help = 'NWM streams layer. Default layer is "RouteLink_FL_2020_04_07")', required = False, default = 'RouteLink_FL_2020_04_07') parser.add_argument('-f', '--nwm-feature-id-field', help = 'id field for nwm streams. Not required if NWM v2.1 is used (default id field is "ID")', required = False, default = 'ID') - #Extract to dictionary and assign to variables. + # Extract to dictionary and assign to variables. args = vars(parser.parse_args()) - #Run create_flow_forecast_file + # Run create_flow_forecast_file create_flow_forecast_file(**args) - - diff --git a/tools/eval_alt_catfim.py b/tools/eval_alt_catfim.py new file mode 100644 index 000000000..ffe86cf0e --- /dev/null +++ b/tools/eval_alt_catfim.py @@ -0,0 +1,221 @@ + +import os +import argparse +from multiprocessing import Pool +import csv +import json + +from tools_shared_variables import TEST_CASES_DIR +from tools_shared_functions import compute_contingency_stats_from_rasters + + +def create_master_metrics_csv_alt(master_metrics_csv_output, json_list, version): + + # Construct header + metrics_to_write = ['true_negatives_count', + 'false_negatives_count', + 'true_positives_count', + 'false_positives_count', + 'contingency_tot_count', + 'cell_area_m2', + 'TP_area_km2', + 'FP_area_km2', + 'TN_area_km2', + 'FN_area_km2', + 'contingency_tot_area_km2', + 'predPositive_area_km2', + 'predNegative_area_km2', + 'obsPositive_area_km2', + 'obsNegative_area_km2', + 'positiveDiff_area_km2', + 'CSI', + 'FAR', + 'TPR', + 'TNR', + 'PPV', + 'NPV', + 'ACC', + 'Bal_ACC', + 'MCC', + 'EQUITABLE_THREAT_SCORE', + 'PREVALENCE', + 'BIAS', + 'F1_SCORE', + 'TP_perc', + 'FP_perc', + 'TN_perc', + 'FN_perc', + 'predPositive_perc', + 'predNegative_perc', + 'obsPositive_perc', + 'obsNegative_perc', + 'positiveDiff_perc', + 'masked_count', + 'masked_perc', + 'masked_area_km2' + ] + + additional_header_info_prefix = ['version', 'nws_lid', 'magnitude', 'huc'] + list_to_write = [additional_header_info_prefix + metrics_to_write + ['full_json_path'] + ['flow'] + ['benchmark_source'] + ['extent_config'] + ["calibrated"]] + + + + for full_json_path in json_list: + + # Parse variables from json path. + split_json_handle = os.path.split(full_json_path)[1].split('_') + + benchmark_source = split_json_handle[2] + huc = split_json_handle[1] + nws_lid = split_json_handle[0] + magnitude = split_json_handle[3].replace('.json', '') + + real_json_path = os.path.join(os.path.split(full_json_path)[0], nws_lid + '_b0m_stats.json') + + sub_list_to_append = [version, nws_lid, magnitude, huc] + + stats_dict = json.load(open(real_json_path)) + for metric in metrics_to_write: + sub_list_to_append.append(stats_dict[metric]) + sub_list_to_append.append(real_json_path) + sub_list_to_append.append('NA') + sub_list_to_append.append(benchmark_source) + sub_list_to_append.append('MS') + sub_list_to_append.append('yes') + + list_to_write.append(sub_list_to_append) + + + with open(master_metrics_csv_output, 'w', newline='') as csvfile: + csv_writer = csv.writer(csvfile) + csv_writer.writerows(list_to_write) + + +def process_alt_comparison(args): + + predicted_raster_path = args[0] + benchmark_raster_path = args[1] + agreement_raster = args[2] + stats_csv = args[3] + stats_json = args[4] + mask_values = args[5] + stats_modes_list = args[6] + test_id = args[7] + mask_dict = args[8] + + compute_contingency_stats_from_rasters(predicted_raster_path, + benchmark_raster_path, + agreement_raster, + stats_csv=stats_csv, + stats_json=stats_json, + mask_values=[], + stats_modes_list=stats_modes_list, + test_id=test_id, + mask_dict=mask_dict, + ) + + print("Finished processing " + agreement_raster) + + +if __name__ == '__main__': + + # Parse arguments. + parser = argparse.ArgumentParser(description='Produces metrics for alternative CatFIM.') + parser.add_argument('-d','--catfim-directory',help='Path to directory storing CatFIM outputs. This is the most parent dir, usually named by a version.',required=True) + parser.add_argument('-w','--output-workspace',help='Add a special name to the end of the branch.',required=True, default="") + parser.add_argument('-m','--master-metrics-csv',help='Define path for master metrics CSV file.',required=False,default=None) + parser.add_argument('-j','--job-number',help='Number of processes to use. Default is 1.',required=False, default="1") + + # Assign variables from arguments. + args = vars(parser.parse_args()) + catfim_dir = args['catfim_directory'] + output_workspace = args['output_workspace'] + job_number = int(args['job_number']) + master_metrics_csv = args['master_metrics_csv'] + + if master_metrics_csv == None: + master_metrics_csv = os.path.join(output_workspace, 'master_metrics.csv') + + if not os.path.exists(catfim_dir): + print("CatFIM directory: " + catfim_dir + " does not exist.") + quit + + if not os.path.exists(output_workspace): + os.mkdir(output_workspace) + + catfim_dir_list = os.listdir(catfim_dir) + + procs_list = [] + json_list = [] + + for huc in catfim_dir_list: + if len(huc) == 8: + + huc_workspace = os.path.join(output_workspace, huc) + if not os.path.exists(huc_workspace): + os.mkdir(huc_workspace) + + huc_dir_path = os.path.join(catfim_dir, huc) + + # List AHPS sites. + site_list = os.listdir(huc_dir_path) + + # Loop through AHPS sites. + for site in site_list: + site_dir = os.path.join(huc_dir_path, site) + + site_workspace = os.path.join(huc_workspace, site) + if not os.path.exists(site_workspace): + os.mkdir(site_workspace) + + for category in ['action', 'minor', 'moderate', 'major']: + # Presumptiously define inundation grid path. + category_grid_path = os.path.join(site_dir, site + '_' + category + '_extent_' + huc + '.tif') + + if os.path.exists(category_grid_path): + + site_category_workspace = os.path.join(site_workspace, category) + if not os.path.exists(site_category_workspace): + os.mkdir(site_category_workspace) + + # Map path to benchmark data, both NWS and USGS. + for benchmark_type in ['nws', 'usgs']: + benchmark_grid = os.path.join(TEST_CASES_DIR, benchmark_type + '_test_cases', 'validation_data_' + benchmark_type, huc, site, category, 'ahps_' + site + '_huc_' + huc + '_extent_' + category + '.tif') + + if os.path.exists(benchmark_grid): + + # Create dir in output workspace for results. + file_handle = site + '_' + huc + '_' + benchmark_type + '_' + category + + predicted_raster_path = category_grid_path + benchmark_raster_path = benchmark_grid + agreement_raster = os.path.join(site_category_workspace, file_handle + '.tif') + stats_csv = os.path.join(site_category_workspace, file_handle + '.csv') + stats_json = os.path.join(site_category_workspace, file_handle + '.json') + mask_values=None + stats_modes_list=['total_area'] + test_id='' + mask_dict={'levees': {'path': '/data/test_cases/other/zones/leveed_areas_conus.shp', 'buffer': None, 'operation': 'exclude'}, + 'waterbodies': {'path': '/data/test_cases/other/zones/nwm_v2_reservoirs.shp', 'buffer': None, 'operation': 'exclude'}, + site: {'path': '/data/test_cases/{benchmark_type}_test_cases/validation_data_{benchmark_type}/{huc}/{site}/{site}_domain.shp'.format(benchmark_type=benchmark_type, site=site, category=category, huc=huc), 'buffer': None, 'operation': 'include'}} + + json_list.append(stats_json) + + # Either add to list to multiprocess or process serially, depending on user specification. + if job_number > 1: + procs_list.append([predicted_raster_path, benchmark_raster_path, agreement_raster,stats_csv,stats_json,mask_values,stats_modes_list,test_id, mask_dict]) + else: + process_alt_comparison([predicted_raster_path, benchmark_raster_path, agreement_raster,stats_csv,stats_json, mask_values,stats_modes_list,test_id, mask_dict]) + + # Multiprocess. + if job_number > 1: + with Pool(processes=job_number) as pool: + pool.map(process_alt_comparison, procs_list) + + # Merge stats into single file. + version = os.path.split(output_workspace)[1] + create_master_metrics_csv_alt(master_metrics_csv, json_list, version) + + + + \ No newline at end of file diff --git a/tools/eval_plots.py b/tools/eval_plots.py new file mode 100644 index 000000000..6c2dcf00f --- /dev/null +++ b/tools/eval_plots.py @@ -0,0 +1,679 @@ +#!/usr/bin/env python3 +import pandas as pd +from pathlib import Path +import argparse +from natsort import natsorted +import geopandas as gpd +import numpy as np +import matplotlib.pyplot as plt +import seaborn as sns +import re +import os +import sys +sys.path.append('/foss_fim/src') +from utils.shared_variables import VIZ_PROJECTION +from dotenv import load_dotenv +from tools_shared_functions import aggregate_wbd_hucs, get_metadata +from tools_shared_variables import BAD_SITES, DISCARD_AHPS_QUERY + +#Get variables from .env file. +load_dotenv() +WBD_LAYER = os.getenv("WBD_LAYER") +API_BASE_URL = os.getenv("API_BASE_URL") + +######################################################################### +#Create boxplot +######################################################################### +def boxplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, title_text, fim_configuration, textbox_str = False, simplify_legend = False, dest_file = False): + ''' + Create boxplots. + + Parameters + ---------- + dataframe : DataFrame + Pandas dataframe data to be plotted. + x_field : STR + Field to use for x-axis + x_order : List + Order to arrange the x-axis. + y_field : STR + Field to use for the y-axis + hue_field : STR + Field to use for hue (typically FIM version) + title_text : STR + Text for plot title. + fim_configuration: STR + Configuration of FIM (FR or MS or Composite). + simplify_legend : BOOL, optional + If True, it will simplify legend to FIM 1, FIM 2, FIM 3. + The default is False. + dest_file : STR or BOOL, optional + If STR provide the full path to the figure to be saved. If False + no plot is saved to disk. The default is False. + + Returns + ------- + fig : MATPLOTLIB + Plot. + + ''' + + #initialize plot + fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(15, 10)) + #Use seaborn to plot the boxplot + axes=sns.boxplot(x=x_field, y=y_field, order=x_order, hue=hue_field, hue_order = ordered_hue, data=dataframe, palette='bright') + #set title of plot + axes.set_title(f'{title_text} ({y_field})',fontsize=20, weight = 'bold') + #Set yticks and background horizontal line. + axes.set(ylim=(0.0,1.0),yticks = np.arange(0,1.1,0.1)) + for index,ytick in enumerate(axes.get_yticks()): + plt.axhline(y=ytick,color='black',linestyle = '--',linewidth = 1,alpha = 0.1) + #Define y axis label and x axis label. + axes.set_ylabel(f'{y_field}',fontsize='xx-large',weight = 'bold') + axes.set_xlabel('',fontsize=0,weight = 'bold') + #Set sizes of ticks and legend. + axes.tick_params(labelsize = 'xx-large') + axes.legend(markerscale = 2, fontsize =20, loc = 'lower left') + + #If simple legend desired + if simplify_legend: + #trim labels to FIM 1, FIM 2, and the FIM 3 version + handles, org_labels = axes.get_legend_handles_labels() + label_dict = {} + for label in org_labels: + if 'fim_1' in label: + label_dict[label] = 'FIM 1' + elif 'fim_2' in label: + label_dict[label] = 'FIM 2' + ' ' + fim_configuration.lower() + elif 'fim_3' in label and len(label) < 20: + label_dict[label] = re.split('_fr|_ms', label)[0].replace('_','.').replace('fim.','FIM ') + ' ' + fim_configuration.lower() + if label.endswith('_c'): + label_dict[label] = label_dict[label] + ' c' + else: + label_dict[label] = label + #Define simplified labels as a list. + new_labels = [label_dict[label] for label in org_labels] + #Define legend location. FAR needs to be in different location than CSI/POD. + if y_field == 'FAR': + legend_location = 'upper right' + else: + legend_location = 'lower left' + #rename legend labels to the simplified labels. + axes.legend(handles, new_labels, markerscale = 2, fontsize = 14, loc = legend_location, ncol = int(np.ceil(len(new_labels)/7))) + #Print textbox if supplied + if textbox_str: + box_props = dict(boxstyle='round', facecolor='white', alpha=0.5) + axes.text(0.01, 0.99, textbox_str, transform=axes.transAxes, fontsize=14, verticalalignment='top', bbox=box_props) + + #If figure to be saved to disk, then do so, otherwise return figure + if dest_file: + fig.savefig(dest_file) + plt.close(fig) + else: + return fig + +######################################################################### +#Create scatter plot +######################################################################### +def scatterplot(dataframe, x_field, y_field, title_text, stats_text=False, annotate = False, dest_file = False): + ''' + Create boxplots. + + Parameters + ---------- + dataframe : DataFrame + Pandas dataframe data to be plotted. + x_field : STR + Field to use for x-axis (Assumes FIM 2) + y_field : STR + Field to use for the y-axis (Assumes FIM 3) + title_text : STR + Text for plot title. + stats_text : STR or BOOL + Text for stats to place on chart. Default is false (no stats printed) + dest_file : STR or BOOL, optional + If STR provide the full path to the figure to be saved. If False + no plot is saved to disk. The default is False. + + Returnsy + ------- + fig : MATPLOTLIB + Plot. + + ''' + + #initialize plot + fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(15, 10)) + + #Use seaborn to plot the boxplot + axes=sns.scatterplot(data=dataframe, x=x_field, y=y_field, color = 'black', s = 150) + + #Set xticks and yticks and background horizontal line. + axes.set(ylim=(0.0,1.0),yticks = np.arange(0,1.1,0.1)) + axes.set(xlim=(0.0,1.0),xticks = np.arange(0,1.1,0.1)) + axes.grid(b=True, which='major', axis='both') + + #Set sizes of ticks and legend. + axes.tick_params(labelsize = 'xx-large') + + #Define y axis label and x axis label. + axes.set_ylabel(f'{y_field.replace("_"," ")}',fontsize='xx-large',weight = 'bold') + axes.set_xlabel(f'{x_field.replace("_"," ")}',fontsize='xx-large',weight = 'bold') + + #Plot diagonal line + diag_range = [0,1] + axes.plot(diag_range, diag_range, color='gray', transform=axes.transAxes) + + + #set title of plot + axes.set_title(f'{title_text}',fontsize=20, weight = 'bold') + + if annotate: + #Set text for labels + box_props = dict(boxstyle='round', facecolor='white', alpha=0.5) + textbox_str = 'Target Better' + axes.text(0.3, 0.6, textbox_str, transform=axes.transAxes, fontsize=32, color = 'gray', fontweight = 'bold', verticalalignment='top', bbox=box_props, rotation = 35, rotation_mode = 'anchor') + textbox_str = 'Baseline Better' + axes.text(0.5, 0.2, textbox_str, transform=axes.transAxes, fontsize=32, color = 'gray', fontweight = 'bold', verticalalignment='top', bbox=box_props, rotation = 35, rotation_mode = 'anchor') + + if stats_text: + #Add statistics textbox + axes.text(0.01, 0.80, stats_text, transform=axes.transAxes, fontsize=24, verticalalignment='top', bbox=box_props) + + #If figure to be saved to disk, then do so, otherwise return fig + if dest_file: + fig.savefig(dest_file) + plt.close(fig) + else: + return fig +######################################################################### +#Create barplot +######################################################################### +def barplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, title_text, fim_configuration, textbox_str = False, simplify_legend = False, display_values = False, dest_file = False): + ''' + Create barplots. + + Parameters + ---------- + dataframe : DataFrame + Pandas dataframe data to be plotted. + x_field : STR + Field to use for x-axis + x_order : List + Order to arrange the x-axis. + y_field : STR + Field to use for the y-axis + hue_field : STR + Field to use for hue (typically FIM version) + title_text : STR + Text for plot title. + fim_configuration: STR + Configuration of FIM (FR or MS or Composite). + simplify_legend : BOOL, optional + If True, it will simplify legend to FIM 1, FIM 2, FIM 3. + Default is False. + display_values : BOOL, optional + If True, Y values will be displayed above bars. + Default is False. + dest_file : STR or BOOL, optional + If STR provide the full path to the figure to be saved. If False + no plot is saved to disk. Default is False. + + Returns + ------- + fig : MATPLOTLIB + Plot. + + ''' + + #initialize plot + fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(15, 10)) + #Use seaborn to plot the boxplot + axes=sns.barplot(x=x_field, y=y_field, order=x_order, hue=hue_field, hue_order = ordered_hue, data=dataframe, palette='bright') + #set title of plot + axes.set_title(f'{title_text}',fontsize=20, weight = 'bold') + #Set yticks and background horizontal line. + axes.set(ylim=(0.0,1.0),yticks = np.arange(0,1.1,0.1)) + for index,ytick in enumerate(axes.get_yticks()): + plt.axhline(y=ytick,color='black',linestyle = '--',linewidth = 1,alpha = 0.1) + #Define y axis label and x axis label. + axes.set_ylabel(f'{y_field.upper()}',fontsize='xx-large',weight = 'bold') + axes.set_xlabel('',fontsize=0,weight = 'bold') + #Set sizes of ticks and legend. + axes.tick_params(labelsize = 'xx-large') + axes.legend(markerscale = 2, fontsize =20, loc = 'upper right') + #If simple legend desired + if simplify_legend: + #trim labels to FIM 1, FIM 2, FIM 3 + handles, org_labels = axes.get_legend_handles_labels() + label_dict = {} + for label in org_labels: + if 'fim_1' in label: + label_dict[label] = 'FIM 1' + elif 'fim_2' in label: + label_dict[label] = 'FIM 2' + ' ' + fim_configuration.lower() + elif 'fim_3' in label and len(label) < 20: + label_dict[label] = re.split('_fr|_ms', label)[0].replace('_','.').replace('fim.','FIM ') + ' ' + fim_configuration.lower() + if label.endswith('_c'): + label_dict[label] = label_dict[label] + ' c' + else: + label_dict[label] = label + #Define simplified labels as a list. + new_labels = [label_dict[label] for label in org_labels] + #rename legend labels to the simplified labels. + axes.legend(handles, new_labels, markerscale = 2, fontsize = 14, loc = 'upper right', ncol = int(np.ceil(len(new_labels)/7))) + #Add Textbox + if textbox_str: + box_props = dict(boxstyle='round', facecolor='white', alpha=0.5) + axes.text(0.01, 0.99, textbox_str, transform=axes.transAxes, fontsize=18, verticalalignment='top', bbox=box_props) + + #Display Y values above bars + if display_values: + #Add values of bars directly above bar. + for patch in axes.patches: + value = round(patch.get_height(),3) + axes.text(patch.get_x()+patch.get_width()/2., + patch.get_height(), + '{:1.3f}'.format(value), + ha="center", fontsize=18) + + #If figure to be saved to disk, then do so, otherwise return fig + if dest_file: + fig.savefig(dest_file) + plt.close(fig) + else: + return fig +####################################################################### +#Filter dataframe generated from csv file from run_test_case aggregation +######################################################################## +def filter_dataframe(dataframe, unique_field): + ''' + + This script will filter out the sites (or hucs) which are not consistently + found for all versions for a given magnitude. For example, an AHPS + lid site must have output for all 3 versions (fim1, fim2, fim3) for + a given magnitude (eg action) otherwise that lid is filtered out. + Likewise for a BLE a huc must have output for all 3 versions + (fim1, fim2, fim3) for a given magnitude (eg 100yr) otherwise it is + filtered out. + + Parameters + ---------- + dataframe : Pandas DataFrame + Containing the input metrics originating from synthesize_test_cases + unique_field : STR + base resolution for each benchmark source: 'nws'/'usgs' (nws_lid) + ble (huc). + + Returns + ------- + final_filtered_dataframe : Pandas Dataframe + Filtered dataframe that contains only common sites (lids or hucs) between versions for each magnitude. For example, for AHPS all sites which were run for each version for a given magnitude will be kept or for ble, all hucs which ran for all versions for a given magnitude. + unique_sites: DICT + The sites that were included in the dataframe for each magnitude. + + ''' + + #Get lists of sites for each magnitude/version + unique_sites = dataframe.groupby(['magnitude','version'])[unique_field].agg('unique') + #Get unique magnitudes + magnitudes = dataframe.magnitude.unique() + #Create new dataframe to hold metrics for the common sites as well as the actual lists of common sites. + final_filtered_dataframe = pd.DataFrame() + all_unique_sites = {} + #Cycle through each magnitude + for magnitude in magnitudes: + #Compile a list of sets containing unique lids pertaining to each threshold. List contains 3 unique sets [{fim1:unique lids},{fim2: unique lids},{fim3: unique lids}] + sites_per_magnitude=[set(a) for a in unique_sites[magnitude]] + #Intersect the sets to get the common lids per threshold then convert to list. + common_sites_per_magnitude = list(set.intersection(*sites_per_magnitude)) + #Write common sites to dataframe + all_unique_sites[magnitude] = common_sites_per_magnitude + #Query filtered dataframe and only include data associated with the common sites for that magnitude + filtered_common_sites = dataframe.query(f'magnitude == "{magnitude}" & {unique_field} in @common_sites_per_magnitude') + #Append the data for each magnitude to a final dataframe that will contain data for all common sites for all magnitudes. + final_filtered_dataframe = final_filtered_dataframe.append(filtered_common_sites, ignore_index = True) + + return final_filtered_dataframe, all_unique_sites +############################################################################## +############################################################################## +#Main function to analyze metric csv. +############################################################################## +def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR'] , spatial = False, fim_1_ms = False, site_barplots = False): + + ''' + Creates plots and summary statistics using metrics compiled from + synthesize_test_cases. Required inputs are metrics_csv and workspace. + Outputs include: + aggregate__.csv: this csv + contains the aggregated total statistics (i.e. CSI, FAR, POD) + using the summed area_sq_km fields + __common_sites.csv: this csv + contains the unique sites (e.g usgs/nws: nws_lid; ble: huc08) + considered for aggregation/plots for each magnitude. The selected + sites occur in all versions analyzed. For example, if FIM 1, + FIM 2, FIM 3.0.0.3 were versions analyzed, the common sites + would be those that had data for ALL versions. This + analysis is then redone for each magnitude. As such, the number + of sites may vary with magnitude. The number of sites for each + magnitude is annotated on generated plots. + __analyzed_data.csv: this is the + dataset used to create plots and aggregate statistics. It is + a subset of the input metrics file and consists of the common + sites. + csi_aggr__.png: bar plot of the + aggregated CSI scores. Number of common sites is annotated + (see list of sites listed in *_*_common_sites.csv). + csi__.png: box plot of CSI scores + (sites weighted equally). Number of common sites is annotated + (see list of sites listed in *_*_common_sites.csv). + far__*.png: box plot of FAR scores + (sites weighted equally). Number of common sites is annotated + (see list of sites listed in *_*_common_sites.csv). + tpr__*.png: box plot of TPR/POD + scores (sites weighted equally). Number of common sites is + annotated (see list of sites listed in *_*_common_sites.csv). + csi_scatter__*.png: scatter plot comparing + two versions for a given magnitude. This is only generated if + there are exactly two versions analyzed. + csi_scatter___data.csv: data used to create the + csi_scatter_plot + Optional: 'individual' directory with subfolders for each site in analysis. In these + site subdirectories are the following files: + csi___.png: A barplot + of CSI for each version for all magnitudes for the site. + Optional (if spatial argument supplied): + fim_performance_points.shp -- A shapefile of ahps points with + metrics contained in attribute table. + fim_performance_polys.shp -- A shapefile of huc8 polygons with + metrics contained in attribute table. + + + + Parameters + ---------- + metrics_csv : STRING + Path to csv produced as part of synthesize_test_cases containing + all metrics across all versions. + workspace : STRING + Path to the output workspace. Subdirectories will be created + reflecting the evaluation datasets. + versions: LIST + A list of versions to be aggregated/plotted. Uses the "startswith" + approach. Versions should be supplied in the order they are to + be plotted. For example: ['fim_', 'fb']; This will evaluate all + versions that start with fim_ (e.g. fim_1, fim_2, fim_3) and any + feature branch that starts with "fb". To esbalish version order, + the fim versions are naturally sorted and then fb versions + (naturally sorted) are appended. These versions are also used to + filter the input metric csv as only these versions are retained + for analysis. + stats: LIST + A list of statistics to be plotted. Must be identical to column + field in metrics_csv. CSI, POD, TPR are currently calculated, if + additional statistics are desired formulas would need to be coded. + spatial : BOOL, optional + Creates spatial datasets of the base unit (ble: huc polygon, ahps: point) + with metrics contained in attribute tables. The geospatial data is + either supplied in the .env file (WBD Huc layer) or from WRDS (ahps). + The outputs are consistent with requirements set forth by the vizualization team. + Additionally, there is a commented out section where if the user + passes the extent files generated during creation of nws/usgs ahps + preprocessing, the actual maps and flows used for evaluation are + appended to the ahps shapefile output. + fim_1_ms: BOOL + Default is false. If True then fim_1 rows are duplicated with + extent_config set to MS. This allows for FIM 1 to be included + in MS plots/stats (helpful for nws/usgs ahps comparisons). + site_barplots: BOOL + Default is false. If True then barplots for each individual site are + created. An 'individual' directory with subdirectories of each site + are created and the plot is located in each site subdirectory. + + Returns + ------- + all_datasets : DICT + Dictionary containing all datasets generated. + Keys: (benchmark_source, extent_config), + Values: (filtered dataframe, common sites) + + ''' + + # Import metrics csv as DataFrame and initialize all_datasets dictionary + csv_df = pd.read_csv(metrics_csv, dtype = {'huc':str}) + + # fim_1_ms flag enables FIM 1 to be shown on MS plots/stats + if fim_1_ms: + #Query FIM 1 rows based on version beginning with "fim_1" + fim_1_rows = csv_df.query('version.str.startswith("fim_1")').copy() + #Set extent configuration to MS (instead of FR) + fim_1_rows['extent_config'] = 'MS' + #Append duplicate FIM 1 rows to original dataframe + csv_df = csv_df.append(fim_1_rows, ignore_index = True) + + # If versions are supplied then filter out + if versions: + #Filter out versions based on supplied version list + metrics = csv_df.query('version.str.startswith(tuple(@versions))') + else: + metrics = csv_df + + # Group by benchmark source + benchmark_by_source = metrics.groupby(['benchmark_source', 'extent_config']) + + ''' Iterate through benchmark_by_source. Pre-filter metrics dataframe + as needed (e.g. usgs/nws filter query). Then further filtering to + discard all hucs/nws_lid that are not present across all analyzed + versions for a given magnitude. The final filtered dataset is written + to a dictionary with the key (benchmark source, extent config) + and values (filtered dataframe, common sites). ''' + + all_datasets = {} + for (benchmark_source, extent_configuration), benchmark_metrics in benchmark_by_source: + + '''If source is usgs/nws define the base resolution and query + (use alternate query if passed). Append filtered datasets to + all_datasets dictionary.''' + + if benchmark_source in ['usgs','nws']: + + # Set the base processing unit for the ahps runs. + base_resolution = 'nws_lid' + + # Filter the dataset based on query (IMPORTED FROM TOOLS_SHARED_VARIABLES.py) + ahps_metrics = benchmark_metrics.query(DISCARD_AHPS_QUERY) + + # Filter out all instances where the base_resolution doesn't exist across all desired fim versions for a given magnitude + all_datasets[(benchmark_source, extent_configuration)] = filter_dataframe(ahps_metrics, base_resolution) + + # If source is 'ble', set base_resolution and append ble dataset to all_datasets dictionary + elif benchmark_source in ['ble', 'ifc']: + + # Set the base processing unit for ble runs + base_resolution = 'huc' + + # Filter out all instances where base_resolution doesn't exist across all desired fim versions for a given magnitude + all_datasets[(benchmark_source, extent_configuration)] = filter_dataframe(benchmark_metrics, base_resolution) + + # For each dataset in all_datasets, generate plots and aggregate statistics + for (dataset_name,configuration), (dataset, sites) in all_datasets.items(): + + # Define and create the output workspace as a subfolder within the supplied workspace + output_workspace = Path(workspace) / dataset_name / configuration.lower() + output_workspace.mkdir(parents = True, exist_ok = True) + + # Write out the filtered dataset and common sites to file + dataset.to_csv(output_workspace / (f'{dataset_name}_{configuration.lower()}_analyzed_data.csv'), index = False) + sites_pd = pd.DataFrame.from_dict(sites, orient = 'index').transpose() + sites_pd.to_csv(output_workspace / (f'{dataset_name}_{configuration.lower()}_common_sites.csv'), index = False) + + # Set the order of the magnitudes and define base resolution + if dataset_name == 'ble': + magnitude_order = ['100yr', '500yr'] + base_resolution = 'huc' + elif dataset_name == 'ifc': + magnitude_order = ['2yr','5yr','10yr','25yr','50yr','100yr','200yr','500yr'] + base_resolution = 'huc' + elif dataset_name in ['usgs','nws']: + magnitude_order = ['action','minor','moderate','major'] + base_resolution = 'nws_lid' + + # Calculate aggregated metrics based on total_sq_km fields + dataset_sums = dataset.groupby(['version', 'magnitude'])[['TP_area_km2','FP_area_km2','FN_area_km2']].sum() + dataset_sums['csi'] = dataset_sums['TP_area_km2']/(dataset_sums['TP_area_km2'] + dataset_sums['FP_area_km2'] + dataset_sums['FN_area_km2']) + dataset_sums['far'] = dataset_sums['FP_area_km2']/(dataset_sums['TP_area_km2'] + dataset_sums['FP_area_km2']) + dataset_sums['pod'] = dataset_sums['TP_area_km2']/(dataset_sums['TP_area_km2'] + dataset_sums['FN_area_km2']) + dataset_sums = dataset_sums.reset_index() + + # Write aggregated metrics to file + dataset_sums.to_csv(output_workspace / f'aggregate_{dataset_name}_{configuration.lower()}.csv', index = False ) + + ## This section naturally orders analyzed versions which defines the hue order for the generated plots + # Get all versions in dataset + all_versions = list(dataset.version.unique()) + version_order = [] + + # If versions are not specified then use all available versions and assign to versions_list + if not versions: + versions_list = all_versions + # If versions are supplied assign to versions_list + else: + versions_list = versions + # For each version supplied by the user + for version in versions_list: + #Select all the versions that start with the supplied version. + selected_versions = [sel_version for sel_version in all_versions if sel_version.startswith(version)] + #Naturally sort selected_versions + selected_versions = natsorted(selected_versions) + #Populate version order based on the sorted subsets. + version_order.extend(selected_versions) + + # Define textbox which will contain the counts of each magnitude + textbox = [] + for magnitude in sites: + count = len(sites[magnitude]) + line_text = f'{magnitude.title()} Sites = {count}' + textbox.append(line_text) + textbox = '\n'.join(textbox) + + # Create aggregate barplot + aggregate_file = output_workspace / (f'csi_aggr_{dataset_name}_{configuration.lower()}.png') + barplot(dataframe = dataset_sums, x_field = 'magnitude', x_order = magnitude_order, y_field = 'csi', hue_field = 'version', ordered_hue = version_order, title_text = f'Aggregate {dataset_name.upper()} FIM Scores', fim_configuration = configuration, textbox_str = textbox, simplify_legend = True, dest_file = aggregate_file) + + #If enabled, write out barplots of CSI for individual sites. + if site_barplots: + individual_dirs = output_workspace / 'individual' + individual_dirs.mkdir(parents = True, exist_ok = True) + subset = dataset.groupby(base_resolution) + for site_name, site_data in subset: + site_file = individual_dirs / f'csi_{str(site_name)}_{dataset_name}_{configuration.lower()}.png' + barplot(dataframe = site_data, x_field = 'magnitude', x_order = magnitude_order, y_field = 'CSI', hue_field = 'version', ordered_hue = version_order, title_text = f'{str(site_name).upper()} FIM Scores', fim_configuration = configuration, textbox_str = False, simplify_legend = True, dest_file = site_file) + + # Create box plots for each metric in supplied stats + for stat in stats: + output_file = output_workspace / (f'{stat.lower()}_{dataset_name}_{configuration.lower()}.png') + boxplot(dataframe = dataset, x_field = 'magnitude', x_order = magnitude_order, y_field = stat, hue_field = 'version', ordered_hue = version_order, title_text = f'{dataset_name.upper()} FIM Sites', fim_configuration = configuration, textbox_str = textbox, simplify_legend = True, dest_file = output_file) + + # Get the last 2 versions from the version order for scatter plot + if len(version_order) == 2: + x_version, y_version = version_order + for magnitude in magnitude_order: + # Scatterplot comparison between last 2 versions + x_csi = dataset.query(f'version == "{x_version}" & magnitude == "{magnitude}"')[[base_resolution, 'CSI']] + y_csi = dataset.query(f'version == "{y_version}" & magnitude == "{magnitude}"')[[base_resolution, 'CSI']] + plotdf = pd.merge(x_csi, y_csi, on = base_resolution, suffixes = (f"_{x_version}",f"_{y_version}")) + # Define arguments for scatterplot function + title_text = f'CSI {magnitude}' + dest_file = output_workspace / f'csi_scatter_{magnitude}_{configuration.lower()}.png' + scatterplot(dataframe = plotdf, x_field = f'CSI_{x_version}', y_field = f'CSI_{y_version}', title_text = title_text, annotate = False, dest_file = dest_file) + #Write out dataframe used to create scatter plots + plotdf['Diff (C-B)'] = plotdf[f'CSI_{y_version}'] - plotdf[f'CSI_{x_version}'] + plotdf.to_csv(output_workspace / f'csi_scatter_{magnitude}_{configuration.lower()}_data.csv', index = False) + + ####################################################################### + #Create spatial layers with threshold and mapping information + ######################################################################## + if spatial: + ############################################################### + #This section will join ahps metrics to a spatial point layer + ############################################################### + if all_datasets.get(('nws','MS')) and all_datasets.get(('usgs','MS')): + #Get point data for ahps sites + #Get metrics for usgs and nws benchmark sources + usgs_dataset,sites = all_datasets.get(('usgs','MS')) + nws_dataset, sites = all_datasets.get(('nws','MS')) + #Append usgs/nws dataframes and filter unnecessary columns and rename remaining. + all_ahps_datasets = usgs_dataset.append(nws_dataset) + all_ahps_datasets = all_ahps_datasets.filter(['huc','nws_lid','version','magnitude','TP_area_km2','FP_area_km2','TN_area_km2','FN_area_km2','CSI','FAR','TPR','benchmark_source']) + all_ahps_datasets.rename(columns = {'benchmark_source':'source'}, inplace = True) + + #Get spatial data from WRDS + #Get metadata from WRDS API + select_by = 'nws_lid' + selector = list(all_ahps_datasets.nws_lid.unique()) + metadata_url = f'{API_BASE_URL}/metadata' + metadata_list, metadata_df = get_metadata(metadata_url, select_by, selector) + #Create geospatial data from WRDS output + dictionary, gdf = aggregate_wbd_hucs(metadata_list, Path(WBD_LAYER), retain_attributes = True) + #Trim out unecessary columns and rename remaining columns + gdf = gdf.filter(['identifiers_nws_lid', 'nws_data_name', 'identifiers_nwm_feature_id','nws_data_wfo','nws_data_state','nws_data_county','geometry']) + gdf.rename(columns = {'identifiers_nws_lid':'nws_lid', 'nws_data_name':'lid_name','identifiers_nwm_feature_id':'feature_id','nws_data_wfo':'wfo','nws_data_state':'state','nws_data_county':'county','HUC8':'huc8'}, inplace = True) + + #Join spatial data to metric data + gdf['nws_lid'] = gdf['nws_lid'].str.lower() + joined = gdf.merge(all_ahps_datasets, on = 'nws_lid') + #Project to VIZ projection and write to file + joined = joined.to_crs(VIZ_PROJECTION) + joined.to_file(Path(workspace) / 'fim_performance_points.shp') + else: + print('NWS/USGS MS datasets not analyzed, no spatial data created.\nTo produce spatial data analyze a MS version.') + + ################################################################ + #This section joins ble (FR) metrics to a spatial layer of HUCs. + ################################################################ + if all_datasets.get(('ble','FR')) and all_datasets.get(('ifc','FR')): + #Select BLE, FR dataset. + ble_dataset, sites = all_datasets.get(('ble','FR')) + ifc_dataset, sites = all_datasets.get(('ifc','FR')) + huc_datasets = ble_dataset.append(ifc_dataset) + #Read in HUC spatial layer + wbd_gdf = gpd.read_file(Path(WBD_LAYER), layer = 'WBDHU8') + #Join metrics to HUC spatial layer + wbd_with_metrics = wbd_gdf.merge(huc_datasets, how = 'inner', left_on = 'HUC8', right_on = 'huc') + #Filter out unnecessary columns + wbd_with_metrics = wbd_with_metrics.filter(['version','magnitude','huc','TP_area_km2','FP_area_km2','TN_area_km2','FN_area_km2','CSI','FAR','TPR','benchmark_source','geometry']) + wbd_with_metrics.rename(columns = {'benchmark_source':'source'}, inplace = True ) + #Project to VIZ projection + wbd_with_metrics = wbd_with_metrics.to_crs(VIZ_PROJECTION) + #Write out to file + wbd_with_metrics.to_file(Path(workspace) / 'fim_performance_polys.shp') + else: + print('BLE/IFC FR datasets not analyzed, no spatial data created.\nTo produce spatial data analyze a FR version') +####################################################################### +if __name__ == '__main__': + # Parse arguments + parser = argparse.ArgumentParser(description = f'Plot and aggregate statistics for benchmark datasets (BLE/AHPS libraries)') + parser.add_argument('-m','--metrics_csv', help = 'Metrics csv created from synthesize test cases.', required = True) + parser.add_argument('-w', '--workspace', help = 'Output workspace', required = True) + parser.add_argument('-v', '--versions', help = 'List of versions to be plotted/aggregated. Versions are filtered using the "startswith" approach. For example, ["fim_","fb1"] would retain all versions that began with "fim_" (e.g. fim_1..., fim_2..., fim_3...) as well as any feature branch that began with "fb". An other example ["fim_3","fb"] would result in all fim_3 versions being plotted along with the fb.', nargs = '+', default = []) + parser.add_argument('-s', '--stats', help = 'List of statistics (abbrev to 3 letters) to be plotted/aggregated', nargs = '+', default = ['CSI','TPR','FAR'], required = False) + parser.add_argument('-sp', '--spatial', help = 'If enabled, creates spatial layers with metrics populated in attribute table.', action = 'store_true', required = False) + parser.add_argument('-f', '--fim_1_ms', help = 'If enabled fim_1 rows will be duplicated and extent config assigned "ms" so that fim_1 can be shown on mainstems plots/stats', action = 'store_true', required = False) + parser.add_argument('-i', '--site_plots', help = 'If enabled individual barplots for each site are created.', action = 'store_true', required = False) + + # Extract to dictionary and assign to variables + args = vars(parser.parse_args()) + + # Finalize Variables + m = args['metrics_csv'] + w = args['workspace'] + v = args['versions'] + s = args['stats'] + sp= args['spatial'] + f = args['fim_1_ms'] + i = args['site_plots'] + + # Run eval_plots function + print('The following AHPS sites are considered "BAD_SITES": ' + ', '.join(BAD_SITES)) + print('The following query is used to filter AHPS: ' + DISCARD_AHPS_QUERY) + eval_plots(metrics_csv = m, workspace = w, versions = v, stats = s, spatial = sp, fim_1_ms = f, site_barplots = i) diff --git a/tools/fim_completion_check.py b/tools/fim_completion_check.py new file mode 100755 index 000000000..6623e4f1c --- /dev/null +++ b/tools/fim_completion_check.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 + +import os +import argparse + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Checks final FIM outputs to identify missing HUCs') + parser.add_argument('-i','--huc-list-dir', help='list of HUCs to run', required=True) + parser.add_argument('-o','--output-folder', help='directory of HUCs completed', required=True) + + args = vars(parser.parse_args()) + + huc_list_dir = args['huc_list_dir'] + output_folder = args['output_folder'] + + if not os.path.isfile(huc_list_dir): + huc_list = huc_list_dir.split() + else: + + with open(huc_list_dir) as f: + huc_list = f.read().splitlines() + + output_huc_list = os.listdir(output_folder) + + if 'logs' in output_huc_list: + output_huc_list.remove('logs') + + if 'aggregate_fim_outputs' in output_huc_list: + output_huc_list.remove('aggregate_fim_outputs') + + missing_hucs = list(set(huc_list) - set(output_huc_list)) + + if len(missing_hucs) > 0: + print (f"MISSING {len(missing_hucs)} HUCS from outputs: {missing_hucs}") diff --git a/tools/generate_categorical_fim.py b/tools/generate_categorical_fim.py new file mode 100755 index 000000000..bcd6fc14d --- /dev/null +++ b/tools/generate_categorical_fim.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 + +import os +import subprocess +import argparse +import time +from pathlib import Path +import geopandas as gpd +import pandas as pd + + +def update_mapping_status(output_mapping_dir, output_flows_dir): + ''' + Updates the status for nws_lids from the flows subdirectory. Status + is updated for sites where the inundation.py routine was not able to + produce inundation for the supplied flow files. It is assumed that if + an error occured in inundation.py that all flow files for a given site + experienced the error as they all would have the same nwm segments. + + Parameters + ---------- + output_mapping_dir : STR + Path to the output directory of all inundation maps. + output_flows_dir : STR + Path to the directory containing all flows. + + Returns + ------- + None. + + ''' + # Find all LIDs with empty mapping output folders + subdirs = [str(i) for i in Path(output_mapping_dir).rglob('**/*') if i.is_dir()] + empty_nws_lids = [Path(directory).name for directory in subdirs if not list(Path(directory).iterdir())] + + # Write list of empty nws_lids to DataFrame, these are sites that failed in inundation.py + mapping_df = pd.DataFrame({'nws_lid':empty_nws_lids}) + mapping_df['did_it_map'] = 'no' + mapping_df['map_status'] = ' and all categories failed to map' + + # Import shapefile output from flows creation + shapefile = Path(output_flows_dir)/'nws_lid_flows_sites.shp' + flows_df = gpd.read_file(shapefile) + + # Join failed sites to flows df + flows_df = flows_df.merge(mapping_df, how = 'left', on = 'nws_lid') + + # Switch mapped column to no for failed sites and update status + flows_df.loc[flows_df['did_it_map'] == 'no', 'mapped'] = 'no' + flows_df.loc[flows_df['did_it_map']=='no','status'] = flows_df['status'] + flows_df['map_status'] + + # Perform pass for HUCs where mapping was skipped due to missing data #TODO check with Brian + flows_hucs = [i.stem for i in Path(output_flows_dir).iterdir() if i.is_dir()] + mapping_hucs = [i.stem for i in Path(output_mapping_dir).iterdir() if i.is_dir()] + missing_mapping_hucs = list(set(flows_hucs) - set(mapping_hucs)) + + # Update status for nws_lid in missing hucs and change mapped attribute to 'no' + flows_df.loc[flows_df.eval('HUC8 in @missing_mapping_hucs & mapped == "yes"'), 'status'] = flows_df['status'] + ' and all categories failed to map because missing HUC information' + flows_df.loc[flows_df.eval('HUC8 in @missing_mapping_hucs & mapped == "yes"'), 'mapped'] = 'no' + + # Clean up GeoDataFrame and rename columns for consistency + flows_df = flows_df.drop(columns = ['did_it_map','map_status']) + flows_df = flows_df.rename(columns = {'nws_lid':'ahps_lid'}) + + # Write out to file + nws_lid_path = Path(output_mapping_dir) / 'nws_lid_sites.shp' + flows_df.to_file(nws_lid_path) + + +if __name__ == '__main__': + + # Parse arguments + parser = argparse.ArgumentParser(description = 'Run Categorical FIM') + parser.add_argument('-f','--fim_version',help='Name of directory containing outputs of fim_run.sh',required=True) + parser.add_argument('-j','--number_of_jobs',help='Number of processes to use. Default is 1.',required=False, default="1",type=int) + args = vars(parser.parse_args()) + + # Get arguments + fim_version = args['fim_version'] + number_of_jobs = args['number_of_jobs'] + + # Define default arguments. Modify these if necessary + fim_run_dir = Path(f'{fim_version}') + fim_version_folder = os.path.basename(fim_version) + output_flows_dir = Path(f'/data/catfim/{fim_version_folder}/flows') + output_mapping_dir = Path(f'/data/catfim/{fim_version_folder}/mapping') + nwm_us_search = '5' + nwm_ds_search = '5' + write_depth_tiff = False + + ## Run CatFIM scripts in sequence + # Generate CatFIM flow files + print('Creating flow files') + start = time.time() + subprocess.call(['python3','/foss_fim/tools/generate_categorical_fim_flows.py', '-w' , str(output_flows_dir), '-u', nwm_us_search, '-d', nwm_ds_search]) + end = time.time() + elapsed_time = (end-start)/60 + print(f'Finished creating flow files in {elapsed_time} minutes') + + # Generate CatFIM mapping + print('Begin mapping') + start = time.time() + subprocess.call(['python3','/foss_fim/tools/generate_categorical_fim_mapping.py', '-r' , str(fim_run_dir), '-s', str(output_flows_dir), '-o', str(output_mapping_dir), '-j', str(number_of_jobs)]) + end = time.time() + elapsed_time = (end-start)/60 + print(f'Finished mapping in {elapsed_time} minutes') + + # Updating mapping status + print('Updating mapping status') + update_mapping_status(str(output_mapping_dir), str(output_flows_dir)) diff --git a/tools/generate_categorical_fim_flows.py b/tools/generate_categorical_fim_flows.py new file mode 100755 index 000000000..d2f5f0501 --- /dev/null +++ b/tools/generate_categorical_fim_flows.py @@ -0,0 +1,239 @@ +#!/usr/bin/env python3 +from pathlib import Path +import pandas as pd +import time +from tools_shared_functions import aggregate_wbd_hucs, mainstem_nwm_segs, get_thresholds, flow_data, get_metadata, get_nwm_segs +import argparse +from dotenv import load_dotenv +import os +import sys +sys.path.append('/foss_fim/src') +from utils.shared_variables import VIZ_PROJECTION + +EVALUATED_SITES_CSV = r'/data/inputs/ahps_sites/evaluated_ahps_sites.csv' + + +def get_env_paths(): + load_dotenv() + #import variables from .env file + API_BASE_URL = os.getenv("API_BASE_URL") + WBD_LAYER = os.getenv("WBD_LAYER") + return API_BASE_URL, WBD_LAYER + + +def generate_catfim_flows(workspace, nwm_us_search, nwm_ds_search): + ''' + This will create static flow files for all nws_lids and save to the + workspace directory with the following format: + huc code + nws_lid_code + threshold (action/minor/moderate/major if they exist/are defined by WRDS) + flow file (ahps_{lid code}_huc_{huc 8 code}_flows_{threshold}.csv) + + This will use the WRDS API to get the nwm segments as well as the flow + values for each threshold at each nws_lid and then create the necessary + flow file to use for inundation mapping. + + Parameters + ---------- + workspace : STR + Location where output flow files will exist. + nwm_us_search : STR + Upstream distance (in miles) for walking up NWM network. + nwm_ds_search : STR + Downstream distance (in miles) for walking down NWM network. + wbd_path : STR + Location of HUC geospatial data (geopackage). + + Returns + ------- + None. + + ''' + + all_start = time.time() + #Define workspace and wbd_path as a pathlib Path. Convert search distances to integer. + workspace = Path(workspace) + nwm_us_search = int(nwm_us_search) + nwm_ds_search = int(nwm_ds_search) + metadata_url = f'{API_BASE_URL}/metadata' + threshold_url = f'{API_BASE_URL}/nws_threshold' + ################################################################### + + #Create workspace + workspace.mkdir(parents=True,exist_ok = True) + + print('Retrieving metadata...') + #Get metadata for 'CONUS' + conus_list, conus_dataframe = get_metadata(metadata_url, select_by = 'nws_lid', selector = ['all'], must_include = 'nws_data.rfc_forecast_point', upstream_trace_distance = nwm_us_search, downstream_trace_distance = nwm_ds_search ) + + #Get metadata for Islands + islands_list, islands_dataframe = get_metadata(metadata_url, select_by = 'state', selector = ['HI','PR'] , must_include = None, upstream_trace_distance = nwm_us_search, downstream_trace_distance = nwm_ds_search) + + #Append the dataframes and lists + all_lists = conus_list + islands_list + + print('Determining HUC using WBD layer...') + #Assign HUCs to all sites using a spatial join of the FIM 3 HUC layer. + #Get a dictionary of hucs (key) and sites (values) as well as a GeoDataFrame + #of all sites used later in script. + huc_dictionary, out_gdf = aggregate_wbd_hucs(metadata_list = all_lists, wbd_huc8_path = WBD_LAYER) + + #Get all possible mainstem segments + print('Getting list of mainstem segments') + #Import list of evaluated sites + print(EVALUATED_SITES_CSV) + print(os.path.exists(EVALUATED_SITES_CSV)) + list_of_sites = pd.read_csv(EVALUATED_SITES_CSV)['Total_List'].to_list() + #The entire routine to get mainstems is hardcoded in this function. + ms_segs = mainstem_nwm_segs(metadata_url, list_of_sites) + + #Loop through each huc unit, first define message variable and flood categories. + all_messages = [] + flood_categories = ['action', 'minor', 'moderate', 'major', 'record'] + for huc in huc_dictionary: + print(f'Iterating through {huc}') + #Get list of nws_lids + nws_lids = huc_dictionary[huc] + #Loop through each lid in list to create flow file + for lid in nws_lids: + #Convert lid to lower case + lid = lid.lower() + #Get stages and flows for each threshold from the WRDS API. Priority given to USGS calculated flows. + stages, flows = get_thresholds(threshold_url = threshold_url, select_by = 'nws_lid', selector = lid, threshold = 'all') + #Check if stages are supplied, if not write message and exit. + if all(stages.get(category, None)==None for category in flood_categories): + message = f'{lid}:missing threshold stages' + all_messages.append(message) + continue + #Check if calculated flows are supplied, if not write message and exit. + if all(flows.get(category, None) == None for category in flood_categories): + message = f'{lid}:missing calculated flows' + all_messages.append(message) + continue + + #find lid metadata from master list of metadata dictionaries (line 66). + metadata = next((item for item in all_lists if item['identifiers']['nws_lid'] == lid.upper()), False) + + #Get mainstem segments of LID by intersecting LID segments with known mainstem segments. + segments = get_nwm_segs(metadata) + site_ms_segs = set(segments).intersection(ms_segs) + segments = list(site_ms_segs) + #if no segments, write message and exit out + if not segments: + print(f'{lid} no segments') + message = f'{lid}:missing nwm segments' + all_messages.append(message) + continue + #For each flood category + for category in flood_categories: + #Get the flow + flow = flows[category] + #If there is a valid flow value, write a flow file. + if flow: + #round flow to nearest hundredth + flow = round(flow,2) + #Create the guts of the flow file. + flow_info = flow_data(segments,flow) + #Define destination path and create folders + output_file = workspace / huc / lid / category / (f'ahps_{lid}_huc_{huc}_flows_{category}.csv') + output_file.parent.mkdir(parents = True, exist_ok = True) + #Write flow file to file + flow_info.to_csv(output_file, index = False) + else: + message = f'{lid}:{category} is missing calculated flow' + all_messages.append(message) + + #Get various attributes of the site. + lat = float(metadata['usgs_preferred']['latitude']) + lon = float(metadata['usgs_preferred']['longitude']) + wfo = metadata['nws_data']['wfo'] + rfc = metadata['nws_data']['rfc'] + state = metadata['nws_data']['state'] + county = metadata['nws_data']['county'] + name = metadata['nws_data']['name'] + flow_units = flows['units'] + flow_source = flows['source'] + stage_units = stages['units'] + stage_source = stages['source'] + wrds_timestamp = stages['wrds_timestamp'] + nrldb_timestamp = metadata['nrldb_timestamp'] + nwis_timestamp = metadata['nwis_timestamp'] + + #Create a csv with same information as shapefile but with each threshold as new record. + csv_df = pd.DataFrame() + for threshold in flood_categories: + line_df = pd.DataFrame({'nws_lid': [lid], 'name':name, 'WFO': wfo, 'rfc':rfc, 'huc':[huc], 'state':state, 'county':county, 'magnitude': threshold, 'q':flows[threshold], 'q_uni':flows['units'], 'q_src':flow_source, 'stage':stages[threshold], 'stage_uni':stages['units'], 's_src':stage_source, 'wrds_time':wrds_timestamp, 'nrldb_time':nrldb_timestamp,'nwis_time':nwis_timestamp, 'lat':[lat], 'lon':[lon]}) + csv_df = csv_df.append(line_df) + #Round flow and stage columns to 2 decimal places. + csv_df = csv_df.round({'q':2,'stage':2}) + + #If a site folder exists (ie a flow file was written) save files containing site attributes. + output_dir = workspace / huc / lid + if output_dir.exists(): + #Export DataFrame to csv containing attributes + csv_df.to_csv(output_dir / f'{lid}_attributes.csv', index = False) + else: + message = f'{lid}:missing all calculated flows' + all_messages.append(message) + + print('wrapping up...') + #Recursively find all *_attributes csv files and append + csv_files = list(workspace.rglob('*_attributes.csv')) + all_csv_df = pd.DataFrame() + for csv in csv_files: + #Huc has to be read in as string to preserve leading zeros. + temp_df = pd.read_csv(csv, dtype={'huc':str}) + all_csv_df = all_csv_df.append(temp_df, ignore_index = True) + #Write to file + all_csv_df.to_csv(workspace / 'nws_lid_attributes.csv', index = False) + + #This section populates a shapefile of all potential sites and details + #whether it was mapped or not (mapped field) and if not, why (status field). + + #Preprocess the out_gdf GeoDataFrame. Reproject and reformat fields. + viz_out_gdf = out_gdf.to_crs(VIZ_PROJECTION) + viz_out_gdf.rename(columns = {'identifiers_nwm_feature_id': 'nwm_seg', 'identifiers_nws_lid':'nws_lid', 'identifiers_usgs_site_code':'usgs_gage'}, inplace = True) + viz_out_gdf['nws_lid'] = viz_out_gdf['nws_lid'].str.lower() + + #Using list of csv_files, populate DataFrame of all nws_lids that had + #a flow file produced and denote with "mapped" column. + nws_lids = [file.stem.split('_attributes')[0] for file in csv_files] + lids_df = pd.DataFrame(nws_lids, columns = ['nws_lid']) + lids_df['mapped'] = 'yes' + + #Identify what lids were mapped by merging with lids_df. Populate + #'mapped' column with 'No' if sites did not map. + viz_out_gdf = viz_out_gdf.merge(lids_df, how = 'left', on = 'nws_lid') + viz_out_gdf['mapped'] = viz_out_gdf['mapped'].fillna('no') + + #Write messages to DataFrame, split into columns, aggregate messages. + messages_df = pd.DataFrame(all_messages, columns = ['message']) + messages_df = messages_df['message'].str.split(':', n = 1, expand = True).rename(columns={0:'nws_lid', 1:'status'}) + status_df = messages_df.groupby(['nws_lid'])['status'].apply(', '.join).reset_index() + + #Join messages to populate status field to candidate sites. Assign + #status for null fields. + viz_out_gdf = viz_out_gdf.merge(status_df, how = 'left', on = 'nws_lid') + viz_out_gdf['status'] = viz_out_gdf['status'].fillna('all calculated flows available') + + #Filter out columns and write out to file + viz_out_gdf = viz_out_gdf.filter(['nws_lid','usgs_gage','nwm_seg','HUC8','mapped','status','geometry']) + viz_out_gdf.to_file(workspace /'nws_lid_flows_sites.shp') + + #time operation + all_end = time.time() + print(f'total time is {round((all_end - all_start)/60),1} minutes') + + +if __name__ == '__main__': + #Parse arguments + parser = argparse.ArgumentParser(description = 'Create forecast files for all nws_lid sites') + parser.add_argument('-w', '--workspace', help = 'Workspace where all data will be stored.', required = True) + parser.add_argument('-u', '--nwm_us_search', help = 'Walk upstream on NWM network this many miles', required = True) + parser.add_argument('-d', '--nwm_ds_search', help = 'Walk downstream on NWM network this many miles', required = True) + args = vars(parser.parse_args()) + + #Run get_env_paths and static_flow_lids + API_BASE_URL, WBD_LAYER = get_env_paths() + generate_catfim_flows(**args) diff --git a/tools/generate_categorical_fim_mapping.py b/tools/generate_categorical_fim_mapping.py new file mode 100755 index 000000000..0827d3f08 --- /dev/null +++ b/tools/generate_categorical_fim_mapping.py @@ -0,0 +1,326 @@ +#!/usr/bin/env python3 + +import sys +import os +from multiprocessing import Pool +import argparse +import traceback +import rasterio +import geopandas as gpd +import pandas as pd +import shutil +from rasterio.features import shapes +from shapely.geometry.polygon import Polygon +from shapely.geometry.multipolygon import MultiPolygon +from inundation import inundate +sys.path.append('/foss_fim/src') +from utils.shared_variables import PREP_PROJECTION,VIZ_PROJECTION +from utils.shared_functions import getDriver + +INPUTS_DIR = r'/data/inputs' +magnitude_list = ['action', 'minor', 'moderate','major', 'record'] + +# Define necessary variables for inundation() +hucs, hucs_layerName = os.path.join(INPUTS_DIR, 'wbd', 'WBD_National.gpkg'), 'WBDHU8' +mask_type, catchment_poly = 'huc', '' + + +def generate_categorical_fim(fim_run_dir, source_flow_dir, output_cat_fim_dir, number_of_jobs, depthtif, log_file): + + no_data_list = [] + procs_list = [] + + source_flow_dir_list = os.listdir(source_flow_dir) + output_flow_dir_list = os.listdir(fim_run_dir) + + # Log missing hucs + missing_hucs = list(set(source_flow_dir_list) - set(output_flow_dir_list)) + missing_hucs = [huc for huc in missing_hucs if "." not in huc] + if len(missing_hucs) > 0: + f = open(log_file, 'a+') + f.write(f"Missing hucs from output directory: {', '.join(missing_hucs)}\n") + f.close() + + # Loop through matching huc directories in the source_flow directory + matching_hucs = list(set(output_flow_dir_list) & set(source_flow_dir_list)) + for huc in matching_hucs: + + if "." not in huc: + + # Get list of AHPS site directories + ahps_site_dir = os.path.join(source_flow_dir, huc) + ahps_site_dir_list = os.listdir(ahps_site_dir) + + # Map paths to HAND files needed for inundation() + fim_run_huc_dir = os.path.join(fim_run_dir, huc) + rem = os.path.join(fim_run_huc_dir, 'rem_zeroed_masked.tif') + catchments = os.path.join(fim_run_huc_dir, 'gw_catchments_reaches_filtered_addedAttributes.tif') + hydroTable = os.path.join(fim_run_huc_dir, 'hydroTable.csv') + + exit_flag = False # Default to False. + + # Check if necessary data exist; set exit_flag to True if they don't exist + for f in [rem, catchments, hydroTable]: + if not os.path.exists(f): + no_data_list.append(f) + exit_flag = True + + # Log missing data + if exit_flag == True: + f = open(log_file, 'a+') + f.write(f"Missing data for: {fim_run_huc_dir}\n") + f.close() + + # Map path to huc directory inside out output_cat_fim_dir + cat_fim_huc_dir = os.path.join(output_cat_fim_dir, huc) + if not os.path.exists(cat_fim_huc_dir): + os.mkdir(cat_fim_huc_dir) + + # Loop through AHPS sites + for ahps_site in ahps_site_dir_list: + # map parent directory for AHPS source data dir and list AHPS thresholds (act, min, mod, maj) + ahps_site_parent = os.path.join(ahps_site_dir, ahps_site) + thresholds_dir_list = os.listdir(ahps_site_parent) + + # Map parent directory for all inundation output filesoutput files. + cat_fim_huc_ahps_dir = os.path.join(cat_fim_huc_dir, ahps_site) + if not os.path.exists(cat_fim_huc_ahps_dir): + os.mkdir(cat_fim_huc_ahps_dir) + + # Loop through thresholds/magnitudes and define inundation output files paths + for magnitude in thresholds_dir_list: + + if "." not in magnitude: + + magnitude_flows_csv = os.path.join(ahps_site_parent, magnitude, 'ahps_' + ahps_site + '_huc_' + huc + '_flows_' + magnitude + '.csv') + + if os.path.exists(magnitude_flows_csv): + + output_extent_grid = os.path.join(cat_fim_huc_ahps_dir, ahps_site + '_' + magnitude + '_extent.tif') + + if depthtif: + output_depth_grid = os.path.join(cat_fim_huc_ahps_dir, ahps_site + '_' + magnitude + '_depth.tif') + else: + output_depth_grid = None + + # Append necessary variables to list for multiprocessing. + procs_list.append([rem, catchments, magnitude_flows_csv, huc, hydroTable, output_extent_grid, output_depth_grid, ahps_site, magnitude, log_file]) + + # Initiate multiprocessing + print(f"Running inundation for {len(procs_list)} sites using {number_of_jobs} jobs") + with Pool(processes=number_of_jobs) as pool: + pool.map(run_inundation, procs_list) + + +def run_inundation(args): + + rem = args[0] + catchments = args[1] + magnitude_flows_csv = args[2] + huc = args[3] + hydroTable = args[4] + output_extent_grid = args[5] + output_depth_grid = args[6] + ahps_site = args[7] + magnitude = args[8] + log_file = args[9] + + try: + inundate(rem,catchments,catchment_poly,hydroTable,magnitude_flows_csv,mask_type,hucs=hucs,hucs_layerName=hucs_layerName, + subset_hucs=huc,num_workers=1,aggregate=False,inundation_raster=output_extent_grid,inundation_polygon=None, + depths=output_depth_grid,out_raster_profile=None,out_vector_profile=None,quiet=True + ) + + except: + # Log errors and their tracebacks + f = open(log_file, 'a+') + f.write(f"{output_extent_grid} - inundation error: {traceback.format_exc()}\n") + f.close() + + #Inundation.py appends the huc code to the supplied output_extent_grid. + #Modify output_extent_grid to match inundation.py saved filename. + #Search for this file, if it didn't create, send message to log file. + base_file_path,extension = os.path.splitext(output_extent_grid) + saved_extent_grid_filename = "{}_{}{}".format(base_file_path,huc,extension) + if not os.path.exists(saved_extent_grid_filename): + with open(log_file, 'a+') as f: + f.write('FAILURE_huc_{}:{}:{} map failed to create\n'.format(huc,ahps_site,magnitude)) + + +def post_process_cat_fim_for_viz(number_of_jobs, output_cat_fim_dir, nws_lid_attributes_filename, log_file): + + # Create workspace + gpkg_dir = os.path.join(output_cat_fim_dir, 'gpkg') + if not os.path.exists(gpkg_dir): + os.mkdir(gpkg_dir) + + # Find the FIM version + fim_version = os.path.basename(output_cat_fim_dir) + merged_layer = os.path.join(output_cat_fim_dir, 'catfim_library.shp') + + if not os.path.exists(merged_layer): # prevents appending to existing output + + huc_ahps_dir_list = os.listdir(output_cat_fim_dir) + skip_list=['errors','logs','gpkg',merged_layer] + + for magnitude in magnitude_list: + + procs_list = [] + + # Loop through all categories + for huc in huc_ahps_dir_list: + + if huc not in skip_list: + + huc_dir = os.path.join(output_cat_fim_dir, huc) + ahps_dir_list = os.listdir(huc_dir) + + # Loop through ahps sites + for ahps_lid in ahps_dir_list: + ahps_lid_dir = os.path.join(huc_dir, ahps_lid) + + extent_grid = os.path.join(ahps_lid_dir, ahps_lid + '_' + magnitude + '_extent_' + huc + '.tif') + + if os.path.exists(extent_grid): + procs_list.append([ahps_lid, extent_grid, gpkg_dir, fim_version, huc, magnitude, nws_lid_attributes_filename]) + + else: + try: + f = open(log_file, 'a+') + f.write(f"Missing layers: {extent_gpkg}\n") + f.close() + except: + pass + + # Multiprocess with instructions + with Pool(processes=number_of_jobs) as pool: + pool.map(reformat_inundation_maps, procs_list) + + # Merge all layers + print(f"Merging {len(os.listdir(gpkg_dir))} layers...") + + for layer in os.listdir(gpkg_dir): + + diss_extent_filename = os.path.join(gpkg_dir, layer) + + # Open diss_extent + diss_extent = gpd.read_file(diss_extent_filename) + diss_extent['viz'] = 'yes' + + # Write/append aggregate diss_extent + if os.path.isfile(merged_layer): + diss_extent.to_file(merged_layer,driver=getDriver(merged_layer),index=False, mode='a') + else: + diss_extent.to_file(merged_layer,driver=getDriver(merged_layer),index=False) + + del diss_extent + + shutil.rmtree(gpkg_dir) + + else: + print(f"{merged_layer} already exists.") + + +def reformat_inundation_maps(args): + + try: + lid = args[0] + grid_path = args[1] + gpkg_dir = args[2] + fim_version = args[3] + huc = args[4] + magnitude = args[5] + nws_lid_attributes_filename = args[6] + + # Convert raster to to shapes + with rasterio.open(grid_path) as src: + image = src.read(1) + mask = image > 0 + + # Aggregate shapes + results = ({'properties': {'extent': 1}, 'geometry': s} for i, (s, v) in enumerate(shapes(image, mask=mask,transform=src.transform))) + + # Convert list of shapes to polygon + extent_poly = gpd.GeoDataFrame.from_features(list(results), crs=PREP_PROJECTION) + + # Dissolve polygons + extent_poly_diss = extent_poly.dissolve(by='extent') + + # Update attributes + extent_poly_diss = extent_poly_diss.reset_index(drop=True) + extent_poly_diss['ahps_lid'] = lid + extent_poly_diss['magnitude'] = magnitude + extent_poly_diss['version'] = fim_version + extent_poly_diss['huc'] = huc + + # Project to Web Mercator + extent_poly_diss = extent_poly_diss.to_crs(VIZ_PROJECTION) + + # Join attributes + nws_lid_attributes_table = pd.read_csv(nws_lid_attributes_filename, dtype={'huc':str}) + nws_lid_attributes_table = nws_lid_attributes_table.loc[(nws_lid_attributes_table.magnitude==magnitude) & (nws_lid_attributes_table.nws_lid==lid)] + + + extent_poly_diss = extent_poly_diss.merge(nws_lid_attributes_table, left_on=['ahps_lid','magnitude','huc'], right_on=['nws_lid','magnitude','huc']) + + extent_poly_diss = extent_poly_diss.drop(columns='nws_lid') + + # Save dissolved multipolygon + handle = os.path.split(grid_path)[1].replace('.tif', '') + + diss_extent_filename = os.path.join(gpkg_dir, handle + "_dissolved.gpkg") + + extent_poly_diss["geometry"] = [MultiPolygon([feature]) if type(feature) == Polygon else feature for feature in extent_poly_diss["geometry"]] + + if not extent_poly_diss.empty: + + extent_poly_diss.to_file(diss_extent_filename,driver=getDriver(diss_extent_filename),index=False) + + except Exception as e: + # Log and clean out the gdb so it's not merged in later + try: + f = open(log_file, 'a+') + f.write(str(diss_extent_filename) + " - dissolve error: " + str(e)) + f.close() + except: + pass + + +if __name__ == '__main__': + + # Parse arguments + parser = argparse.ArgumentParser(description='Categorical inundation mapping for FOSS FIM.') + parser.add_argument('-r','--fim-run-dir',help='Name of directory containing outputs of fim_run.sh',required=True) + parser.add_argument('-s', '--source-flow-dir',help='Path to directory containing flow CSVs to use to generate categorical FIM.',required=True, default="") + parser.add_argument('-o', '--output-cat-fim-dir',help='Path to directory where categorical FIM outputs will be written.',required=True, default="") + parser.add_argument('-j','--number-of-jobs',help='Number of processes to use. Default is 1.',required=False, default="1",type=int) + parser.add_argument('-depthtif','--write-depth-tiff',help='Using this option will write depth TIFFs.',required=False, action='store_true') + + args = vars(parser.parse_args()) + + fim_run_dir = args['fim_run_dir'] + source_flow_dir = args['source_flow_dir'] + output_cat_fim_dir = args['output_cat_fim_dir'] + number_of_jobs = int(args['number_of_jobs']) + depthtif = args['write_depth_tiff'] + + # Create output directory + if not os.path.exists(output_cat_fim_dir): + os.mkdir(output_cat_fim_dir) + + # Create log directory + log_dir = os.path.join(output_cat_fim_dir, 'logs') + if not os.path.exists(log_dir): + os.mkdir(log_dir) + + # Create error log path + log_file = os.path.join(log_dir, 'errors.log') + + # Map path to points with attributes + nws_lid_attributes_filename = os.path.join(source_flow_dir, 'nws_lid_attributes.csv') + + print("Generating Categorical FIM") + generate_categorical_fim(fim_run_dir, source_flow_dir, output_cat_fim_dir, number_of_jobs, depthtif,log_file) + + print("Aggregating Categorical FIM") + post_process_cat_fim_for_viz(number_of_jobs, output_cat_fim_dir,nws_lid_attributes_filename,log_file) diff --git a/tools/generate_nws_lid.py b/tools/generate_nws_lid.py new file mode 100644 index 000000000..005befebe --- /dev/null +++ b/tools/generate_nws_lid.py @@ -0,0 +1,179 @@ +#!/usr/bin/env python3 + +from pathlib import Path +import pandas as pd +import geopandas as gpd +from collections import defaultdict +from tools_shared_functions import aggregate_wbd_hucs, get_metadata +import argparse +from dotenv import load_dotenv +import os +import sys +sys.path.append('/foss_fim/src') +from utils.shared_variables import PREP_PROJECTION + +load_dotenv() +#import variables from .env file +API_BASE_URL = os.getenv("API_BASE_URL") +EVALUATED_SITES_CSV = os.getenv("EVALUATED_SITES_CSV") +WBD_LAYER = os.getenv("WBD_LAYER") +#Define path to NWM stream layer +NWM_FILE='/data/inputs/nwm_hydrofabric/nwm_flows.gpkg' + + +def generate_nws_lid(workspace): + ''' + Generate the nws_lid layer containing all nws_lid points attributed whether site is mainstems and co-located + + Parameters + ---------- + workspace : STR + Directory where outputs will be saved. + + Returns + ------- + None. + + ''' + + ############################################################################## + #Get all nws_lid points + print('Retrieving metadata ..') + + metadata_url = f'{API_BASE_URL}/metadata/' + #Trace downstream from all rfc_forecast_point. + select_by = 'nws_lid' + selector = ['all'] + must_include = 'nws_data.rfc_forecast_point' + downstream_trace_distance = 'all' + fcst_list, fcst_dataframe = get_metadata(metadata_url = metadata_url, select_by = select_by, selector = selector, must_include = must_include, upstream_trace_distance = None, downstream_trace_distance = downstream_trace_distance ) + + #Get list of all evaluated sites not in fcst_list + fcst_list_sites = [record.get('identifiers').get('nws_lid').lower() for record in fcst_list] + evaluated_sites = pd.read_csv(EVALUATED_SITES_CSV)['Total_List'].str.lower().to_list() + evaluated_sites= list(set(evaluated_sites) - set(fcst_list_sites)) + + #Trace downstream from all evaluated sites not in fcst_list + select_by = 'nws_lid' + selector = evaluated_sites + must_include = None + downstream_trace_distance = 'all' + eval_list, eval_dataframe = get_metadata(metadata_url = metadata_url, select_by = select_by, selector = selector, must_include = must_include, upstream_trace_distance = None, downstream_trace_distance = downstream_trace_distance ) + + #Trace downstream from all sites in HI/PR. + select_by = 'state' + selector = ['HI','PR'] + must_include = None + downstream_trace_distance = 'all' + islands_list, islands_dataframe = get_metadata(metadata_url = metadata_url, select_by = select_by, selector = selector, must_include = must_include, upstream_trace_distance = None, downstream_trace_distance = downstream_trace_distance ) + + #Append all lists + all_lists = fcst_list + eval_list + islands_list + + ############################################################################### + #Compile NWM segments from all_lists + + #Get dictionary of downstream segment (key) and target segments (values) + #Get dictionary of target segment (key) and site code (value) + downstream = defaultdict(list) + target = defaultdict(list) + #For each lid metadata dictionary in list + for lid in all_lists: + site = lid.get('identifiers').get('nws_lid') + #Get the nwm feature id associated with the location + location_nwm_seg = lid.get('identifiers').get('nwm_feature_id') + #get all downstream segments + downstream_nwm_segs = lid.get('downstream_nwm_features') + #If valid location_nwm_segs construct two dictionaries. + if location_nwm_seg: + #Dictionary with target segment and site + target[int(location_nwm_seg)].append(site) + #Dictionary of key (2nd to last element) and value (target segment) + #2nd to last element used because last element is always 0 (ocean) and the 2nd to last allows for us to get the river 'tree' (Mississippi, Colorado, etc) + value = location_nwm_seg + if not downstream_nwm_segs: + #Special case, no downstream nwm segments are returned (PR/VI/HI). + key = location_nwm_seg + elif len(downstream_nwm_segs) == 1: + #Special case, the nws_lid is within 1 segment of the ocean (0) + key = location_nwm_seg + elif len(downstream_nwm_segs)>1: + #Otherwise, 2nd to last element used to identify proper river system. + key = downstream_nwm_segs[-2] + #Dictionary with key of 2nd to last downstream segment and value of site nwm segment + downstream[int(key)].append(int(value)) + ############################################################################### + #Walk downstream the network and identify headwater points + print('Traversing network..') + + #Import NWM file and create dictionary of network and create the NWM network dictionary. + nwm_gdf = gpd.read_file(NWM_FILE) + network = nwm_gdf.groupby('ID')['to'].apply(list).to_dict() + + #Walk through network and find headwater points + all_dicts = {} + for tree, targets in downstream.items(): + #All targets are assigned headwaters + sub_dict = {i:'is_headwater' for i in targets} + #Walk downstream of each target + for i in targets: + #Check to see element is not a headwater + if sub_dict[i] == 'not_headwater': + continue + #Get from_node and to_node. + from_node = i + [to_node] = network[from_node] + #Walk downstream from target + while to_node>0: + #Check if to_node is in targets list + if to_node in targets: + sub_dict[to_node] = 'not_headwater' + #Assign downstream ID as to_node + [to_node] = network[to_node] + + #Append status to master dictionary + all_dicts.update(sub_dict) + + #Create dictionaries of nws_lid (key) and headwater status (value) and nws_lid (key) and co-located with same feature_id(value) + final_dict = {} + duplicate_dict = {} + for key,status in all_dicts.items(): + site_list = target[key] + for site in site_list: + final_dict[site] = status + if len(site_list) > 1: + duplicate_dict[site] = 'is_colocated' + else: + duplicate_dict[site] = 'not_colocated' + + ############################################################################## + #Get Spatial data and populate headwater/duplicate attributes + print('Attributing nws_lid layer..') + + #Geodataframe from all_lists, reproject, and reset index. + trash, nws_lid_gdf = aggregate_wbd_hucs(all_lists, WBD_LAYER, retain_attributes = False) + nws_lid_gdf.columns = [name.replace('identifiers_','') for name in nws_lid_gdf.columns] + nws_lid_gdf.to_crs(PREP_PROJECTION, inplace = True) + nws_lid_gdf.reset_index(drop = True) + + #Create DataFrames of headwater and duplicates and join. + final_dict_pd = pd.DataFrame(list(final_dict.items()), columns = ['nws_lid','is_headwater']) + duplicate_dict_pd = pd.DataFrame(list(duplicate_dict.items()),columns = ['nws_lid','is_colocated']) + attributes = final_dict_pd.merge(duplicate_dict_pd, on = 'nws_lid') + attributes.replace({'is_headwater': True,'is_colocated': True,'not_headwater': False,'not_colocated':False}, inplace = True) + + #Join attributes, remove sites with no assigned nwm_feature_id and write to file + joined = nws_lid_gdf.merge(attributes, on='nws_lid', how = 'left') + joined.dropna(subset =['nwm_feature_id'], inplace = True) + Path(workspace).mkdir(parents = True, exist_ok = True) + joined.to_file(Path(workspace) / 'nws_lid.gpkg', driver = 'GPKG') + + +if __name__ == '__main__': + #Parse arguments + parser = argparse.ArgumentParser(description = 'Create spatial data of nws_lid points attributed with mainstems and colocated.') + parser.add_argument('-w', '--workspace', help = 'Workspace where all data will be stored.', required = True) + args = vars(parser.parse_args()) + + #Run get_env_paths and static_flow_lids + generate_nws_lid(**args) diff --git a/tools/gms_tools/inundate_gms.py b/tools/gms_tools/inundate_gms.py new file mode 100644 index 000000000..0148da336 --- /dev/null +++ b/tools/gms_tools/inundate_gms.py @@ -0,0 +1,261 @@ +#!/usr/bin/env python3 + +import numpy as np +import pandas as pd +from inundation import inundate +import os +from tqdm import tqdm +import argparse +from concurrent.futures import ProcessPoolExecutor,ThreadPoolExecutor,as_completed +from inundation import hydroTableHasOnlyLakes, NoForecastFound +import traceback +import logging + + +def Inundate_gms( + hydrofabric_dir, forecast, num_workers=1, + hucs=None, + inundation_raster=None, + inundation_polygon=None, depths_raster=None, + verbose=False, + log_file=None, + output_fileNames=None + ): + + # input handling + if hucs is not None: + try: + _ = (i for i in hucs) + except TypeError: + raise ValueError("hucs argument must be an iterable") + + if isinstance(hucs,str): + hucs = [hucs] + + num_workers = int(num_workers) + + # log file + if log_file is not None: + if os.path.exists(log_file): + os.remove(log_file) + + print('HUC8,BranchID,Exception',file=open(log_file,'w')) + #if log_file: + #logging.basicConfig(filename=log_file, level=logging.INFO) + #logging.info('HUC8,BranchID,Exception') + + # load gms inputs + hucs_branches = pd.read_csv( os.path.join(hydrofabric_dir,'gms_inputs.csv'), + header=None, + dtype= {0:str,1:str} + ) + + if hucs is not None: + hucs = set(hucs) + huc_indices = hucs_branches.loc[:,0].isin(hucs) + hucs_branches = hucs_branches.loc[huc_indices,:] + + # get number of branches + number_of_branches = len(hucs_branches) + + # make inundate generator + inundate_input_generator = __inundate_gms_generator( + hucs_branches,number_of_branches, + hydrofabric_dir, + inundation_raster, + inundation_polygon, + depths_raster, + forecast, + verbose=False + ) + + # start up process pool + # better results with Process pool + executor = ProcessPoolExecutor(max_workers=num_workers) + + # collect output filenames + inundation_raster_fileNames = [None] * number_of_branches + inundation_polygon_fileNames = [None] * number_of_branches + depths_raster_fileNames = [None] * number_of_branches + hucCodes = [None] * number_of_branches + branch_ids = [None] * number_of_branches + + + executor_generator = { + executor.submit(inundate,**inp) : ids for inp,ids in inundate_input_generator + } + + idx = 0 + for future in tqdm(as_completed(executor_generator), + total=len(executor_generator), + disable=(not verbose), + desc="Inundating branches with {} workers".format(num_workers) + ): + + hucCode, branch_id = executor_generator[future] + + try: + future.result() + + except NoForecastFound as exc: + if log_file is not None: + print(f'{hucCode},{branch_id},{exc.__class__.__name__}, {exc}', + file=open(log_file,'a')) + elif verbose: + print(f'{hucCode},{branch_id},{exc.__class__.__name__}, {exc}') + + except hydroTableHasOnlyLakes as exc: + if log_file is not None: + print(f'{hucCode},{branch_id},{exc.__class__.__name__}, {exc}', + file=open(log_file,'a')) + elif verbose: + print(f'{hucCode},{branch_id},{exc.__class__.__name__}, {exc}') + + except Exception as exc: + if log_file is not None: + print(f'{hucCode},{branch_id},{exc.__class__.__name__}, {exc}', + file=open(log_file,'a')) + else: + print(f'{hucCode},{branch_id},{exc.__class__.__name__}, {exc}') + else: + + hucCodes[idx] = hucCode + branch_ids[idx] = branch_id + + try: + #print(hucCode,branch_id,future.result()[0][0]) + inundation_raster_fileNames[idx] = future.result()[0][0] + except TypeError: + pass + + try: + depths_raster_fileNames[idx] = future.result()[1][0] + except TypeError: + pass + + try: + inundation_polygon_fileNames[idx] = future.result()[2][0] + except TypeError: + pass + + idx += 1 + + # power down pool + executor.shutdown(wait=True) + + # make filename dataframe + output_fileNames_df = pd.DataFrame( { + 'huc8' : hucCodes, + 'branchID' : branch_ids, + 'inundation_rasters' : inundation_raster_fileNames, + 'depths_rasters' : depths_raster_fileNames, + 'inundation_polygons' : inundation_polygon_fileNames } + ) + + if output_fileNames is not None: + output_fileNames_df.to_csv(output_fileNames,index=False) + + return(output_fileNames_df) + + + + +def __inundate_gms_generator( + hucs_branches,number_of_branches, + hydrofabric_dir, + inundation_raster, + inundation_polygon, + depths_raster, + forecast,verbose=False + ): + + # iterate over branches + for idx,row in hucs_branches.iterrows(): + + huc = str(row[0]) + branch_id = str(row[1]) + + gms_dir = os.path.join(hydrofabric_dir,huc,'branches') + + rem_branch = os.path.join( gms_dir,branch_id,'rem_zeroed_masked_{}.tif'.format(branch_id) ) + catchments_branch = os.path.join( gms_dir,branch_id, + f'gw_catchments_reaches_filtered_addedAttributes_{branch_id}.tif' ) + hydroTable_branch = os.path.join( gms_dir,branch_id,'hydroTable_{}.csv'.format(branch_id) ) + catchment_poly = os.path.join( gms_dir, branch_id, + f'gw_catchments_reaches_filtered_addedAttributes_crosswalked_{branch_id}.gpkg' ) + + + # branch output + inundation_branch_raster = __append_id_to_file_name(inundation_raster,[huc,branch_id]) + inundation_branch_polygon = __append_id_to_file_name(inundation_polygon,[huc,branch_id]) + depths_branch_raster = __append_id_to_file_name(depths_raster,[huc,branch_id]) + + # identifiers + identifiers = (huc,branch_id) + + # inundate input + inundate_input = { + 'rem' : rem_branch, 'catchments' : catchments_branch, 'catchment_poly' : catchment_poly, + 'hydro_table' : hydroTable_branch,'forecast' : forecast, + 'mask_type' : None, + 'hucs' : None, + 'hucs_layerName' : None, + 'subset_hucs' : None, 'num_workers' : 1, + 'aggregate' : False, + 'inundation_raster' : inundation_branch_raster, + 'inundation_polygon' : inundation_branch_polygon, + 'depths' : depths_branch_raster, + 'out_raster_profile' : None, + 'out_vector_profile' : None, + 'quiet' : not verbose + } + + yield (inundate_input,identifiers) + + + +def __append_id_to_file_name(file_name,identifier): + + + if file_name is not None: + + root,extension = os.path.splitext(file_name) + + if isinstance(identifier,list): + for i in identifier: + out_file_name = root + "_{}".format(i) + out_file_name += extension + else: + out_file_name = root + "_{}".format(identifier) + extension + + else: + out_file_name = None + + return(out_file_name) + + +def __vprint(message,verbose): + if verbose: + print(message) + + +if __name__ == '__main__': + + # parse arguments + parser = argparse.ArgumentParser(description='Inundate GMS') + parser.add_argument('-y','--hydrofabric_dir', help='Directory path to FIM hydrofabric by processing unit', required=True) + parser.add_argument('-u','--hucs',help='List of HUCS to run',required=False,default=None,type=str,nargs='+') + parser.add_argument('-f','--forecast',help='Forecast discharges in CMS as CSV file',required=True) + parser.add_argument('-i','--inundation-raster',help='Inundation Raster output. Only writes if designated.',required=False,default=None) + parser.add_argument('-p','--inundation-polygon',help='Inundation polygon output. Only writes if designated.',required=False,default=None) + parser.add_argument('-d','--depths-raster',help='Depths raster output. Only writes if designated. Appends HUC code in batch mode.',required=False,default=None) + parser.add_argument('-l','--log-file',help='Log-file to store level-path exceptions',required=False,default=None) + parser.add_argument('-o','--output-fileNames',help='Output CSV file with filenames for inundation rasters, inundation polygons, and depth rasters',required=False,default=None) + parser.add_argument('-w','--num-workers', help='Number of Workers', required=False,default=1) + parser.add_argument('-v','--verbose',help='Verbose printing',required=False,default=None,action='store_true') + + + # extract to dictionary and run + Inundate_gms( **vars(parser.parse_args()) ) + + \ No newline at end of file diff --git a/tools/gms_tools/mosaic_inundation.py b/tools/gms_tools/mosaic_inundation.py new file mode 100644 index 000000000..ea24cb75f --- /dev/null +++ b/tools/gms_tools/mosaic_inundation.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python +# coding: utf-8 + +from glob import glob +from gms_tools.overlapping_inundation import OverlapWindowMerge +import argparse +import os +import pandas as pd +from tqdm import tqdm +from tools_shared_variables import elev_raster_ndv + +def Mosaic_inundation( + map_file,mosaic_attribute='inundation_rasters',mosaic_output=None, + mask=None,unit_attribute_name='huc8', + nodata=elev_raster_ndv,workers=4, + remove_inputs=False, + subset=None,verbose=True + ): + + # check input + if mosaic_attribute not in ('inundation_rasters','depths_rasters'): + raise ValueError('Pass inundation or depths for mosaic_attribute argument') + + # load file + if isinstance(map_file,pd.DataFrame): + inundation_maps_df = map_file + del map_file + elif isinstance(map_file,str): + inundation_maps_df = pd.read_csv(map_file, + dtype={unit_attribute_name:str,'branchID':str} + ) + else: + raise TypeError('Pass Pandas Dataframe or file path string to csv for map_file argument') + + # remove NaNs + inundation_maps_df.dropna(axis=0,how='all',inplace=True) + + # subset + if subset is not None: + subset_mask = inundation_maps_df.loc[:,unit_attribute_name].isin(subset) + inundation_maps_df = inundation_maps_df.loc[subset_mask,:] + + # unique aggregation units + aggregation_units = inundation_maps_df.loc[:,unit_attribute_name].unique() + + inundation_maps_df.set_index(unit_attribute_name,drop=True,inplace=True) + + # decide upon wheter to display + if verbose & len(aggregation_units) == 1: + tqdm_disable = False + elif verbose: + tqdm_disable = False + else: + tqdm_disable = True + + for ag in tqdm(aggregation_units,disable=tqdm_disable,desc='Compositing MS and FR maps'): + + try: + inundation_maps_list = inundation_maps_df.loc[ag,mosaic_attribute].tolist() + except AttributeError: + inundation_maps_list = [ inundation_maps_df.loc[ag,mosaic_attribute] ] + + ag_mosaic_output = __append_id_to_file_name(mosaic_output,ag) + #try: + mosaic_by_unit(inundation_maps_list,ag_mosaic_output,nodata, + workers=1,remove_inputs=remove_inputs,mask=mask,verbose=verbose) + #except Exception as exc: + # print(ag,exc) + + + # inundation maps + inundation_maps_df.reset_index(drop=True) + + + +def mosaic_by_unit(inundation_maps_list,mosaic_output,nodata=elev_raster_ndv, + workers=1,remove_inputs=False,mask=None,verbose=False): + + + # overlap object instance + overlap = OverlapWindowMerge( inundation_maps_list, (30, 30) ) + + # mosaic + #if verbose: + # print("Mosaicing ...") + + if mosaic_output is not None: + if workers > 1: + threaded = True + else: + threaded= False + + overlap.merge_rasters(mosaic_output, threaded=threaded, workers=workers,nodata=nodata) + + if mask: + #if verbose: + # print("Masking ...") + overlap.mask_mosaic(mosaic_output,mask,outfile=mosaic_output) + + if remove_inputs: + #if verbose: + # print("Removing inputs ...") + + for inun_map in inundation_maps_list: + if inun_map is not None: + if os.path.isfile(inun_map): + os.remove(inun_map) + + +def __append_id_to_file_name(file_name,identifier): + + + if file_name is not None: + + root,extension = os.path.splitext(file_name) + + if isinstance(identifier,list): + for i in identifier: + out_file_name = root + "_{}".format(i) + out_file_name += extension + else: + out_file_name = root + "_{}".format(identifier) + extension + + else: + out_file_name = None + + return(out_file_name) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Mosaic GMS Inundation Rasters') + parser.add_argument('-i','--map-file', help='List of file paths to inundation/depth maps to mosaic', required=True) + parser.add_argument('-a','--mask', help='File path to vector polygon mask to clip mosaic too', required=False,default=None) + parser.add_argument('-s','--subset', help='Subset units', required=False,default=None,type=str,nargs='+') + parser.add_argument('-n','--nodata', help='Inundation Maps', required=False,default=elev_raster_ndv) + parser.add_argument('-w','--workers', help='Number of Workers', required=False,default=4,type=int) + parser.add_argument('-t','--mosaic-attribute', help='Mosaiced inundation Maps', required=False,default=None) + parser.add_argument('-m','--mosaic-output', help='Mosaiced inundation Maps', required=False,default=None) + parser.add_argument('-r','--remove-inputs', help='Remove original input inundation Maps', required=False,default=False,action='store_true') + parser.add_argument('-v','--verbose', help='Remove original input inundation Maps', required=False,default=False,action='store_true') + + args = vars(parser.parse_args()) + + Mosaic_inundation(**args) \ No newline at end of file diff --git a/tools/gms_tools/overlapping_inundation.py b/tools/gms_tools/overlapping_inundation.py new file mode 100644 index 000000000..5dab3da96 --- /dev/null +++ b/tools/gms_tools/overlapping_inundation.py @@ -0,0 +1,505 @@ +#!/usr/bin/env python +# coding: utf-8 + +import rasterio +from rasterio.windows import from_bounds +import numpy as np +from functools import partial +from affine import Affine +from scipy.optimize import newton +from threading import Lock +import concurrent.futures +from numba import njit +import geopandas as gpd +from rasterio.mask import mask +import sys +import warnings + +class OverlapWindowMerge: + + def __init__(self, + inundation_rsts, + num_partitions=None, + window_xy_size=None): + """ + Initialize the object + + :param inundation_rsts: list of inundation paths or datasets + :param num_partitions: tuple of integers representing num windows in x and y space + :param window_xy_size: tuple of integers represeting num of pixels in windows in x an y space + """ + + # sort for largest spanning dataset (todo: handle mismatched resolutions) + size_func = lambda x: np.abs(x.bounds.left - x.bounds.right) * \ + np.abs(x.bounds.top - x.bounds.bottom) + key_sort_func = lambda x: x['size'] + datasets = [rasterio.open(ds) for ds in inundation_rsts] + ds_dict = [{'dataset': ds, 'size': size_func(ds)} for ds in datasets] + ds_dict.sort(key=key_sort_func, reverse=True) + + # load sample overlapping inundation depth rasters + self.depth_rsts = [x['dataset'] for x in ds_dict] + del ds_dict + + self.rst_dims = [[x.height, x.width] for x in self.depth_rsts] + + self.res = self.depth_rsts[0].meta['transform'][0] + self.depth_bounds = np.array([[[x.bounds.top, + x.bounds.left], + [x.bounds.bottom, + x.bounds.right]] for x in self.depth_rsts]) / self.res + + + # get transform, width, height and bounds + self.proc_unit_transform, self.proc_unit_width, \ + self.proc_unit_height, final_bounds = \ + self.get_final_dims() + + self.proc_unit_bounds = np.array([[final_bounds['top'], + final_bounds['left']], + [final_bounds['bottom'], + final_bounds['right']]]) + + self.proc_unit_bounds = self.proc_unit_bounds / self.res + + self.lat_lon_sign = [np.sign(self.proc_unit_bounds[1, 0] - self.proc_unit_bounds[0, 0]), + np.sign(self.proc_unit_bounds[1, 1] - self.proc_unit_bounds[0, 1])] + + self.partitions = num_partitions + self.window_sizes = window_xy_size + + @staticmethod + @njit + def get_res_bbox_min(x, v, z, y): + """ + Optimize for bounds that fit the final resolution + + :param x: float of compare + :param v: float representing min bound + :param z: float representing max bound + :param y: float representing resolution + """ + return np.abs(z - x) - np.round(np.abs(z - v) / y) * y + + def get_final_dims(self): + """ + Get transform, width, height, and bbox of final dataset + + :return: Affine transform, int width, int height, dict bounds + """ + + left = np.min([d.bounds.left for d in self.depth_rsts]) + top = np.max([d.bounds.top for d in self.depth_rsts]) + right = np.max([d.bounds.right for d in self.depth_rsts]) + bottom = np.min([d.bounds.bottom for d in self.depth_rsts]) + + left = newton(self.get_res_bbox_min, left, args=(left, right, self.res)) + bottom = newton(self.get_res_bbox_min, bottom, args=(bottom, top, self.res)) + + transform = self.depth_rsts[0].meta['transform'] + + width = int(np.abs(right - left) / self.res) + height = int(np.abs(top - bottom) / self.res) + new_transform = Affine(transform[0], + transform[1], + left, + transform[3], + transform[4], + top) + + return new_transform, width, height, {'left': left, + 'top': top, + 'right': right, + 'bottom': bottom} + + def get_window_coords(self): + """ + Return ul/br bounds of window and its respective window idx + + :param partitions: tuple or list of partition sizes for x and y + :param sizes: tuple or list of pixel sizes for x and y + :return: list of ul/br bounds of window, int of respective window idx + """ + + # Set up desired number of partitions (can also be set pixel size) + if self.partitions is not None: + x_res, y_res = self.partitions + elif self.window_sizes is not None: + x_res, y_res = self.window_sizes + else: + raise('in bran crunch') + + # Get window widths (both normal and edge windows) + window_width1 = np.repeat(int(self.proc_unit_width / x_res), x_res) * self.lat_lon_sign[1] + window_width2 = window_width1.copy() + window_width2[-1] += self.proc_unit_width - window_width1[0] * x_res * self.lat_lon_sign[1] + + # Get window heights (both normal and edge windows) + window_height1 = np.repeat(int(self.proc_unit_height / y_res), y_res) * self.lat_lon_sign[0] + window_height2 = window_height1.copy() + window_height2[-1] += self.proc_unit_height - window_height1[0] * y_res * self.lat_lon_sign[0] + + # Get window sizes (both normal and edge windows) + window_bounds1 = np.flip(np.array(np.meshgrid(window_width1, + window_height1)).T.reshape(-1, 2), + axis=1).astype(np.int) + window_bounds2 = np.flip(np.array(np.meshgrid(window_width2, + window_height2)).T.reshape(-1, 2), + axis=1).astype(np.int) + + window_idx = np.array(np.unravel_index(np.arange(y_res * x_res), (y_res, x_res), order='F')) + + return [window_bounds1, window_bounds2], window_idx + + def create_lat_lons(self, + window_bounds, + window_idx): + """ + Return bbox of window and list of latitudes and longitudes + + :param window_bounds: tuple or list of partition sizes for x and y + :param window_idx: int representing index of window + :return: list of float latitudes, list of float longitudes, list of window bbox, list of ul/br coords for window + """ + + upper_left = (window_idx.T * window_bounds[0]) + lower_right = upper_left + window_bounds[1] + + # Merge point arrays, convert back to original units, and get drawable path for each window + bbox = np.hstack([upper_left, lower_right]) + scaled_path_points = [np.array(np.meshgrid([st[0], st[2]], [st[1], st[3]])).T.reshape(-1, 2) for st in bbox] + path_points = (scaled_path_points + self.proc_unit_bounds[0]) * self.res + + # Create arange of latitudes and longitudes and add half of window size + latitudes = np.arange(self.proc_unit_bounds[0, 0], + self.proc_unit_bounds[1, 0] + self.lat_lon_sign[0], + window_bounds[1][0][0])[:-1] + (window_bounds[1][0][0] / 2) + longitudes = np.arange(self.proc_unit_bounds[0, 1], + self.proc_unit_bounds[1, 1] + self.lat_lon_sign[1], + window_bounds[1][0][1])[:-1] + (window_bounds[1][0][1] / 2) + + return latitudes, longitudes, path_points, bbox + + @staticmethod + def get_window_idx(latitudes, + longitudes, + coords, + partitions): + """ + Return raveled window indices + + :param latitudes: list of latitudes within bounds + :param longitudes: list of longitudes within bounds + + :return: ndarray of raveled multi indexes + """ + # Get difference of upper-left and lower-right boundaries and computed lat lons + lat_dif = [np.abs(latitudes - coords[0, 0]), np.abs(latitudes - coords[1, 0])] + lon_dif = [np.abs(longitudes - coords[0, 1]), np.abs(longitudes - coords[1, 1])] + + # Create range between the closest idx for both lats and lons + lon_range = np.arange(np.argmin(lon_dif[0]), np.argmin(lon_dif[1]) + 1) + lat_range = np.arange(np.argmin(lat_dif[0]), np.argmin(lat_dif[1]) + 1) + + # Create mesh grid for each possible set of coords and ravel to get window idx + grid = np.array(np.meshgrid(lat_range, lon_range)).T.reshape(-1, 2) + del lon_range, lat_range, lat_dif, lon_dif + return np.ravel_multi_index([grid[:, 0], grid[:, 1]], partitions, order='F') + + def read_rst_data(self, + win_idx, + datasets, + path_points, + bbox, + meta): + """ + Return data windows and final bounds of window + + :param win_idx: int window index + :param datasets: list of int representing dataset inx + :param path_points: list of bbox for windows + :param bbox: list of ul/br coords of windows + :param meta: metadata for final dataset + + :return: rasterio window object for final window, rasterio window of data window bounds, + data for each raster in window, + """ + # Get window bounding box and get final array output dimensions + window = path_points[win_idx] + window_height, window_width = np.array([np.abs(bbox[win_idx][2] - bbox[win_idx][0]), + np.abs(bbox[win_idx][3] - bbox[win_idx][1])]).astype(np.int) + + bnds = [] + data = [] + for ds in datasets: + # Get rasterio window for each pair of window bounds and depth dataset + + bnd = from_bounds(window[0][1], + window[-1][0], + window[-1][1], + window[0][0], + transform=self.depth_rsts[ds].transform, + height=window_height, + width=window_width) + + bnds.append(bnd) + + # Read raster data with window + read_data = self.depth_rsts[ds].read(1, window=bnd).astype(np.float32) + # Convert all no data to nan values + read_data[read_data == np.float32(self.depth_rsts[ds].meta['nodata'])] = np.nan + data.append(read_data) + del bnd + + final_bnds = from_bounds(window[0][1], + window[-1][0], + window[-1][1], + window[0][0], + transform=meta['transform'], + height=window_height, + width=window_width) + + return [final_bnds, bnds, data] + + + def merge_rasters(self, out_fname, nodata=-9999, threaded=False, workers=4): + """ + Merge multiple raster datasets + + :param out_fname: str path for final merged dataset + :param nodata: int/float representing no data value + """ + + window_bounds, window_idx = self.get_window_coords() + latitudes, longitudes, path_points, bbox = self.create_lat_lons(window_bounds, + window_idx) + + windows = [self.get_window_idx(latitudes, + longitudes, + coords, + self.partitions) + for coords in self.depth_bounds] + + # Create dict with window idx key and dataset idx vals + data_dict = {} + for idx, win in enumerate(windows): + for win_idx in win: + if win_idx in data_dict: + data_dict[win_idx].append(idx) + else: + data_dict[win_idx] = [idx] + + agg_function = partial(np.nanmax, axis=0) + + meta = self.depth_rsts[0].meta + + meta.update(transform=self.proc_unit_transform, + width=self.proc_unit_width, + height=self.proc_unit_height, + nodata=nodata,blockxsize=256, + blockysize=256, tiled=True, + compress='lzw') + + final_windows, data_windows, data = [], [], [] + + def __data_generator(data_dict,path_points,bbox,meta): + + for key, val in data_dict.items(): + + f_window, window, dat = self.read_rst_data(key, + val, + path_points, + bbox, + meta + ) + yield(dat, window, f_window, val) + #final_windows.append(f_window) + #data_windows.append(window) + #data.append(dat) + #del f_window, window, dat + + # create data generator + dgen = __data_generator(data_dict,path_points,bbox,meta) + + lock = Lock() + + with rasterio.open(out_fname, 'w', **meta) as rst: + + merge_partial = partial(merge_data, + rst=rst, + lock=lock, + dtype=meta['dtype'], + agg_function=agg_function, + nodata=meta['nodata'], + rst_dims=self.rst_dims) + + if not threaded: + #for d, dw, fw, ddict in zip(data, + # data_windows, + # final_windows, + # data_dict.values()): + for d, dw, fw, ddict in dgen: + merge_partial(d, dw, fw, ddict) + else: + with concurrent.futures.ThreadPoolExecutor( + max_workers=workers + ) as executor: + executor.map(merge_partial, + data, + data_windows, + final_windows, + data_dict.values() + ) + + def mask_mosaic(self,mosaic,polys,polys_layer=None,outfile=None): + + #rem_array,window_transform = mask(rem,[shape(huc['geometry'])],crop=True,indexes=1) + + # input rem + if isinstance(mosaic,str): + mosaic = rasterio.open(mosaic) + elif isinstance(mosaic,rasterio.DatasetReader): + pass + else: + raise TypeError("Pass rasterio dataset or filepath for mosaic") + + if isinstance(polys,str): + polys=gpd.read_file(polys,layer=polys_layer) + elif isinstance(polys,gpd.GeoDataFrame): + pass + else: + raise TypeError("Pass geopandas dataset or filepath for catchment polygons") + + #fossid = huc['properties']['fossid'] + #if polys.HydroID.dtype != 'str': polys.HydroID = polys.HydroID.astype(str) + #polys=polys[polys.HydroID.str.startswith(fossid)] + mosaic_array, window_transform = mask(mosaic,polys['geometry'],crop=True,indexes=1) + + if outfile: + out_profile = mosaic.profile + out_profile.update(height=mosaic_array.shape[0],width=mosaic_array.shape[1], + transform = window_transform, driver= 'GTiff', + blockxsize=256, blockysize=256, tiled=True, compress='lzw') + + with rasterio.open(outfile,'w',**out_profile) as otfi: + otfi.write(mosaic_array,indexes=1) + + return(mosaic_array,out_profile) + +# Quasi multi write +# Throughput achieved assuming processing time is not identical between windows +# and queued datasets, preferably approx N/2 threads for 9 windows +# @njit +def merge_data(rst_data, + window_bnds, + final_window, + datasets, + dtype, + rst, + lock, + agg_function, + nodata, + rst_dims + ): + """ + Merge data in to final dataset (multi threaded) + + :param rst_data: list of rst data from window + :param window_bnds: list rasterio windows representing data window bounds + :param final_window: rasterio window representing final window bounds + :param datasets: list of int representing dataset idx + :param dtype: data type of final output + :param rst: rasterio writer for final dataset + :param lock: thread concurrency lock + :param agg_function: function to aggregate datasets + :param nodata: nodata of final output + :param rst_dims: dimensions of overlapping rasters + """ + + nan_tile = np.array([np.nan]).astype(dtype)[0] + window_data = np.tile(float(nan_tile), [int(final_window.height), int(final_window.width)]) + + for data, bnds, idx in zip(rst_data, window_bnds, datasets): + # Get indices to apply to base + + col_slice = slice(int(np.max([0, + np.ceil(bnds.col_off * -1)])), + int(np.min([bnds.width, + rst_dims[idx][1] - bnds.col_off]))) + + row_slice = slice(int(np.max([0, + np.ceil(bnds.row_off * -1)])), + int(np.min([bnds.height, + rst_dims[idx][0] - bnds.row_off]))) + + win_shape = window_data[row_slice, col_slice].shape + + if not np.all(np.sign(np.array(win_shape) - np.array(data.shape)) > 0): + data = data[:win_shape[0], :win_shape[1]] + # Assign the data to the base array with aggregate function + merge = [window_data[row_slice, + col_slice], + data] + + del data + with warnings.catch_warnings(): + # This `with` block supresses the RuntimeWarning thrown by numpy when aggregating nan values + warnings.simplefilter("ignore", category=RuntimeWarning) + window_data[row_slice, col_slice] = agg_function(merge) + window_data[np.isnan(window_data)] = nodata + del merge + + del rst_data, window_bnds, datasets + + window_data[(window_data == nan_tile) | (np.isnan(window_data))] = nodata + + with lock: + rst.write_band(1, window_data.astype(dtype), window=final_window) + del window_data + + +if __name__ == '__main__': + import time + # import tracemalloc + import glob + + # print('start', time.localtime()) + # project_path = r'../documentation/data' + # overlap = OverlapWindowMerge([project_path + '/overlap1.tif', + # project_path + '/overlap2.tif', + # project_path + '/overlap3.tif', + # ], + # (3, 3)) + # overlap.merge_rasters(project_path + '/merged_overlap.tif', nodata=0) + # print('end', time.localtime()) + + # tracemalloc.start() + print('start', time.localtime()) + # project_path = r'../documentation/data' + # project_path = '*/mosaicing_data/1_fr_ms_composite' + # overlap = OverlapWindowMerge([project_path + '/inundation_extent_12090301_FR.tif', + # project_path + '/inundation_extent_12090301_MS.tif' + # ], + # (30, 30)) + # overlap.merge_rasters(project_path + '/merged_final5.tif', threaded=True, workers=4, nodata=0) + + # tracemalloc.start() + print('start', time.localtime()) + # project_path = r'../documentation/data' + # project_path = '*/mosaicing_data/2_gms' + # a = glob.glob(project_path + '/inundation*.tif') + # overlap = OverlapWindowMerge(a, + # (30, 30)) + # overlap.merge_rasters(project_path + '/merged_final5.tif', threaded=True, workers=4, nodata=-2e9) + # current, peak = tracemalloc.get_traced_memory() + # print(f"Current memory usage is {current / 10 ** 6}MB; Peak was {peak / 10 ** 6}MB") + # tracemalloc.stop() + + project_path = '*' + overlap = OverlapWindowMerge([project_path + '/nwm_resampled.tif', + project_path + '/rnr_inundation_031403_2020092000.tif' + ], + (1, 1)) + overlap.merge_rasters(project_path + '/merged_final5.tif', threaded=False, workers=4) + + print('end', time.localtime()) \ No newline at end of file diff --git a/tools/inundate_nation.py b/tools/inundate_nation.py new file mode 100644 index 000000000..6fbcdf146 --- /dev/null +++ b/tools/inundate_nation.py @@ -0,0 +1,102 @@ +import argparse +import os + +from inundation import inundate +from multiprocessing import Pool + +INUN_REVIEW_DIR = r'/data/inundation_review/inundation_nwm_recurr/' +INPUTS_DIR = r'/data/inputs' + + +def run_inundation(args): + """ + This script is basically a wrapper for the inundate function and is designed for multiprocessing. + + Args: + args (list): [fim_run_dir (str), huc (str), magnitude (str), magnitude_output_dir (str), config (str)] + + """ + + fim_run_dir = args[0] + huc = args[1] + magnitude = args[2] + magnitude_output_dir = args[3] + config = args[4] + + # Define file paths for use in inundate(). + fim_run_parent = os.path.join(fim_run_dir, huc) + rem = os.path.join(fim_run_parent, 'rem_zeroed_masked.tif') + catchments = os.path.join(fim_run_parent, 'gw_catchments_reaches_filtered_addedAttributes.tif') + mask_type = 'huc' + catchment_poly = '' + hydro_table = os.path.join(fim_run_parent, 'hydroTable.csv') + catchment_poly = os.path.join(fim_run_parent, 'gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg') + inundation_raster = os.path.join(magnitude_output_dir, magnitude + '_' + config + '_inund_extent.tif') + depth_raster = os.path.join(magnitude_output_dir, magnitude + '_' + config + '_inund_depth.tif') + forecast = os.path.join(INUN_REVIEW_DIR, 'nwm_recurr_flow_data', 'recurr_' + magnitude + '_cms.csv') + hucs, hucs_layerName = os.path.join(INPUTS_DIR, 'wbd', 'WBD_National.gpkg'), 'WBDHU8' + + # Run inundate() once for depth and once for extent. + if not os.path.exists(depth_raster): + print("Running the NWM recurrence intervals for HUC: " + huc + ", " + magnitude + "...") + inundate( + rem,catchments,catchment_poly,hydro_table,forecast,mask_type,hucs=hucs,hucs_layerName=hucs_layerName, + subset_hucs=huc,num_workers=1,aggregate=False,inundation_raster=None,inundation_polygon=None, + depths=depth_raster,out_raster_profile=None,out_vector_profile=None,quiet=True + ) + + if not os.path.exists(inundation_raster): + inundate( + rem,catchments,catchment_poly,hydro_table,forecast,mask_type,hucs=hucs,hucs_layerName=hucs_layerName, + subset_hucs=huc,num_workers=1,aggregate=False,inundation_raster=inundation_raster,inundation_polygon=None, + depths=None,out_raster_profile=None,out_vector_profile=None,quiet=True + ) + + +if __name__ == '__main__': + + # Parse arguments. + parser = argparse.ArgumentParser(description='Inundation mapping for FOSS FIM using streamflow recurrence interflow data. Inundation outputs are stored in the /inundation_review/inundation_nwm_recurr/ directory.') + parser.add_argument('-r','--fim-run-dir',help='Name of directory containing outputs of fim_run.sh (e.g. data/ouputs/dev_abc/12345678_dev_test)',required=True) + parser.add_argument('-o', '--output-dir',help='The path to a directory to write the outputs. If not used, the inundation_review directory is used by default -> type=str',required=False, default="") + parser.add_argument('-j', '--job-number',help='The number of jobs',required=False,default=1) + + args = vars(parser.parse_args()) + + fim_run_dir = args['fim_run_dir'] + output_dir = args['output_dir'] + magnitude_list = ['1_5'] + + job_number = int(args['job_number']) + + huc_list = os.listdir(fim_run_dir) + + fim_version = os.path.split(fim_run_dir)[1] + + if output_dir == "": + output_dir = os.path.join(INUN_REVIEW_DIR, fim_version) + + if not os.path.exists(output_dir): + os.mkdir(output_dir) + + if 'ms' in fim_version: + config = 'ms' + if 'fr' in fim_version: + config = 'fr' + + procs_list = [] + + for huc in huc_list: + if huc != 'logs': + for magnitude in magnitude_list: + magnitude_output_dir = os.path.join(output_dir, magnitude + '_' + config) + if not os.path.exists(magnitude_output_dir): + os.mkdir(magnitude_output_dir) + print(magnitude_output_dir) + procs_list.append([fim_run_dir, huc, magnitude, magnitude_output_dir, config]) + + # Multiprocess. + if job_number > 1: + with Pool(processes=job_number) as pool: + pool.map(run_inundation, procs_list) + diff --git a/tests/inundation.py b/tools/inundation.py similarity index 74% rename from tests/inundation.py rename to tools/inundation.py index a07f8d96f..2b5a4599c 100755 --- a/tests/inundation.py +++ b/tools/inundation.py @@ -2,29 +2,35 @@ import numpy as np import pandas as pd -from numba import njit, typeof, typed, types +from numba import njit, typed, types from concurrent.futures import ThreadPoolExecutor,as_completed from subprocess import run from os.path import splitext import rasterio import fiona -import shapely from shapely.geometry import shape -from fiona.crs import to_string -from rasterio.errors import WindowError from rasterio.mask import mask from rasterio.io import DatasetReader,DatasetWriter -from rasterio.features import shapes,geometry_window,dataset_features -from rasterio.windows import transform,Window +from rasterio.features import shapes from collections import OrderedDict import argparse from warnings import warn from gdal import BuildVRT import geopandas as gpd + +class hydroTableHasOnlyLakes(Exception): + """ Raised when a Hydro-Table only has lakes """ + pass + + +class NoForecastFound(Exception): + """ Raised when no forecast is available for a given Hydro-Table """ + pass + def inundate( rem,catchments,catchment_poly,hydro_table,forecast,mask_type,hucs=None,hucs_layerName=None, subset_hucs=None,num_workers=1,aggregate=False,inundation_raster=None,inundation_polygon=None, - depths=None,out_raster_profile=None,out_vector_profile=None,quiet=False + depths=None,out_raster_profile=None,out_vector_profile=None,src_table=None,quiet=False ): """ @@ -157,59 +163,65 @@ def inundate( else: raise TypeError("Pass hydro table csv") - # make windows generator - window_gen = __make_windows_generator(rem,catchments,catchment_poly,mask_type,catchmentStagesDict,inundation_raster,inundation_polygon, - depths,out_raster_profile,out_vector_profile,quiet,hucs=hucs,hucSet=hucSet) + if catchmentStagesDict is not None: + if src_table is not None: + create_src_subset_csv(hydro_table,catchmentStagesDict,src_table) - # start up thread pool - executor = ThreadPoolExecutor(max_workers=num_workers) + # make windows generator + window_gen = __make_windows_generator(rem,catchments,catchment_poly,mask_type,catchmentStagesDict,inundation_raster,inundation_polygon, + depths,out_raster_profile,out_vector_profile,quiet,hucs=hucs,hucSet=hucSet) - # submit jobs - results = {executor.submit(__inundate_in_huc,*wg) : wg[6] for wg in window_gen} + # start up thread pool + executor = ThreadPoolExecutor(max_workers=num_workers) - inundation_rasters = [] ; depth_rasters = [] ; inundation_polys = [] - for future in as_completed(results): - try: - future.result() - except Exception as exc: - __vprint("Exception {} for {}".format(exc,results[future]),not quiet) - else: + # submit jobs + results = {executor.submit(__inundate_in_huc,*wg) : wg[6] for wg in window_gen} - if results[future] is not None: - __vprint("... {} complete".format(results[future]),not quiet) + inundation_rasters = [] ; depth_rasters = [] ; inundation_polys = [] + for future in as_completed(results): + try: + future.result() + except Exception as exc: + __vprint("Exception {} for {}".format(exc,results[future]),not quiet) else: - __vprint("... complete",not quiet) - - inundation_rasters += [future.result()[0]] - depth_rasters += [future.result()[1]] - inundation_polys += [future.result()[2]] - - # power down pool - executor.shutdown(wait=True) - - # optional aggregation - if (aggregate) & (hucs is not None): - # inun grid vrt - if inundation_raster is not None: - inun_vrt = BuildVRT(splitext(inundation_raster)[0]+'.vrt',inundation_rasters) - inun_vrt = None - #_ = run('gdalbuildvrt -q -overwrite {} {}'.format(splitext(inundation_raster)[0]+'.vrt'," ".join(inundation_rasters)),shell=True) - # depths vrt - if depths is not None: - depths_vrt = BuildVRT(splitext(depths)[0]+'.vrt',depth_rasters,resampleAlg='bilinear') - depths_vrt = None - #_ = run('gdalbuildvrt -q -overwrite -r bilinear {} {}'.format(splitext(depths)[0]+'.vrt'," ".join(depth_rasters)),shell=True) - - # concat inun poly - if inundation_polygon is not None: - _ = run('ogrmerge.py -o {} {} -f GPKG -single -overwrite_ds'.format(inundation_polygon," ".join(inundation_polys)),shell=True) - - # close datasets - rem.close() - catchments.close() - return(0) + if results[future] is not None: + __vprint("... {} complete".format(results[future]),not quiet) + else: + __vprint("... complete",not quiet) + + inundation_rasters += [future.result()[0]] + depth_rasters += [future.result()[1]] + inundation_polys += [future.result()[2]] + + # power down pool + executor.shutdown(wait=True) + + # optional aggregation + if (aggregate) & (hucs is not None): + # inun grid vrt + if inundation_raster is not None: + inun_vrt = BuildVRT(splitext(inundation_raster)[0]+'.vrt',inundation_rasters) + inun_vrt = None + #_ = run('gdalbuildvrt -q -overwrite {} {}'.format(splitext(inundation_raster)[0]+'.vrt'," ".join(inundation_rasters)),shell=True) + # depths vrt + if depths is not None: + depths_vrt = BuildVRT(splitext(depths)[0]+'.vrt',depth_rasters,resampleAlg='bilinear') + depths_vrt = None + #_ = run('gdalbuildvrt -q -overwrite -r bilinear {} {}'.format(splitext(depths)[0]+'.vrt'," ".join(depth_rasters)),shell=True) + + # concat inun poly + if inundation_polygon is not None: + _ = run('ogrmerge.py -o {} {} -f GPKG -single -overwrite_ds'.format(inundation_polygon," ".join(inundation_polys)),shell=True) + + # close datasets + rem.close() + catchments.close() + + return(0) + else: + return(1) def __inundate_in_huc(rem_array,catchments_array,crs,window_transform,rem_profile,catchments_profile,hucCode, catchmentStagesDict,depths,inundation_raster,inundation_polygon, @@ -329,6 +341,7 @@ def __inundate_in_huc(rem_array,catchments_array,crs,window_transform,rem_profil if isinstance(depths,DatasetWriter): depths.close() if isinstance(inundation_raster,DatasetWriter): inundation_raster.close() if isinstance(inundation_polygon,fiona.Collection): inundation_polygon.close() + #if isinstance(hucs,fiona.Collection): inundation_polygon.close() # return file names of outputs for aggregation. Handle Nones try: @@ -400,7 +413,7 @@ def __return_huc_in_hucSet(hucCode,hucSet): rem_array,window_transform = mask(rem,[shape(huc['geometry'])],crop=True,indexes=1) catchments_array,_ = mask(catchments,[shape(huc['geometry'])],crop=True,indexes=1) elif mask_type == "filter": - + # input catchments polygon if isinstance(catchment_poly,str): catchment_poly=gpd.read_file(catchment_poly) @@ -415,6 +428,7 @@ def __return_huc_in_hucSet(hucCode,hucSet): rem_array,window_transform = mask(rem,catchment_poly['geometry'],crop=True,indexes=1) catchments_array,_ = mask(catchments,catchment_poly['geometry'],crop=True,indexes=1) + del catchment_poly else: print ("invalid mask type. Options are 'huc' or 'filter'") except ValueError: # shape doesn't overlap raster @@ -456,75 +470,97 @@ def __subset_hydroTable_to_forecast(hydroTable,forecast,subset_hucs=None): 'HydroID':str,'stage':float, 'discharge_cms':float,'LakeID' : int} ) + huc_error = hydroTable.HUC.unique() hydroTable.set_index(['HUC','feature_id','HydroID'],inplace=True) - hydroTable = hydroTable[hydroTable["LakeID"] == -999] # Subset hydroTable to include only non-lake catchments. + elif isinstance(hydroTable,pd.DataFrame): pass #consider checking for correct dtypes, indices, and columns else: raise TypeError("Pass path to hydro-table csv or Pandas DataFrame") - if isinstance(forecast,str): - forecast = pd.read_csv( - forecast, - dtype={'feature_id' : str , 'discharge' : float} - ) - forecast.set_index('feature_id',inplace=True) - elif isinstance(forecast,pd.DataFrame): - pass # consider checking for dtypes, indices, and columns - else: - raise TypeError("Pass path to forecast file csv or Pandas DataFrame") - + hydroTable = hydroTable[hydroTable["LakeID"] == -999] # Subset hydroTable to include only non-lake catchments. - # susbset hucs if passed - if subset_hucs is not None: - if isinstance(subset_hucs,list): - if len(subset_hucs) == 1: - try: - subset_hucs = open(subset_hucs[0]).read().split('\n') - except FileNotFoundError: - pass - elif isinstance(subset_hucs,str): - try: - subset_hucs = open(subset_hucs).read().split('\n') - except FileNotFoundError: - subset_hucs = [subset_hucs] + if not hydroTable.empty: - # subsets HUCS - subset_hucs_orig = subset_hucs.copy() ; subset_hucs = [] - for huc in np.unique(hydroTable.index.get_level_values('HUC')): - for sh in subset_hucs_orig: - if huc.startswith(sh): - subset_hucs += [huc] - - hydroTable = hydroTable[np.in1d(hydroTable.index.get_level_values('HUC'), subset_hucs)] + if isinstance(forecast,str): + forecast = pd.read_csv( + forecast, + dtype={'feature_id' : str , 'discharge' : float} + ) + forecast.set_index('feature_id',inplace=True) + elif isinstance(forecast,pd.DataFrame): + pass # consider checking for dtypes, indices, and columns + else: + raise TypeError("Pass path to forecast file csv or Pandas DataFrame") + + # susbset hucs if passed + if subset_hucs is not None: + if isinstance(subset_hucs,list): + if len(subset_hucs) == 1: + try: + subset_hucs = open(subset_hucs[0]).read().split('\n') + except FileNotFoundError: + pass + elif isinstance(subset_hucs,str): + try: + subset_hucs = open(subset_hucs).read().split('\n') + except FileNotFoundError: + subset_hucs = [subset_hucs] + + # subsets HUCS + subset_hucs_orig = subset_hucs.copy() ; subset_hucs = [] + for huc in np.unique(hydroTable.index.get_level_values('HUC')): + for sh in subset_hucs_orig: + if huc.startswith(sh): + subset_hucs += [huc] + + hydroTable = hydroTable[np.in1d(hydroTable.index.get_level_values('HUC'), subset_hucs)] + + # join tables + try: + hydroTable = hydroTable.join(forecast,on=['feature_id'],how='inner') - # join tables - hydroTable = hydroTable.join(forecast,on=['feature_id'],how='inner') - # initialize dictionary - catchmentStagesDict = typed.Dict.empty(types.int32,types.float64) + # initialize dictionary + catchmentStagesDict = typed.Dict.empty(types.int32,types.float64) - # interpolate stages - for hid,sub_table in hydroTable.groupby(level='HydroID'): + # interpolate stages + for hid,sub_table in hydroTable.groupby(level='HydroID'): - interpolated_stage = np.interp(sub_table.loc[:,'discharge'].unique(),sub_table.loc[:,'discharge_cms'],sub_table.loc[:,'stage']) + interpolated_stage = np.interp(sub_table.loc[:,'discharge'].unique(),sub_table.loc[:,'discharge_cms'],sub_table.loc[:,'stage']) - # add this interpolated stage to catchment stages dict - h = round(interpolated_stage[0],4) + # add this interpolated stage to catchment stages dict + h = round(interpolated_stage[0],4) - hid = types.int32(hid) ; h = types.float32(h) - catchmentStagesDict[hid] = h + hid = types.int32(hid) ; h = types.float32(h) + catchmentStagesDict[hid] = h - # huc set - hucSet = [str(i) for i in hydroTable.index.get_level_values('HUC').unique().to_list()] + # huc set + hucSet = [str(i) for i in hydroTable.index.get_level_values('HUC').unique().to_list()] - return(catchmentStagesDict,hucSet) + return(catchmentStagesDict,hucSet) + except AttributeError: + print (f"No matching feature IDs between forecast and hydrotable for HUC(s): {subset_hucs}") + return(None,None) + else: + print(f"All stream segments in HUC(s): {huc_error} are within lake boundaries.") + return(None,None) def __vprint(message,verbose): if verbose: print(message) +def create_src_subset_csv(hydro_table,catchmentStagesDict,src_table): + src_df = pd.DataFrame.from_dict(catchmentStagesDict, orient='index') + src_df.reset_index(inplace=True) + src_df.columns = ['HydroID','stage_inund'] + df_htable = pd.read_csv(hydro_table,dtype={'HydroID': int}) + df_htable = df_htable.merge(src_df,how='left',on='HydroID') + df_htable['find_match'] = (df_htable['stage'] - df_htable['stage_inund']).abs() + df_htable = df_htable.loc[df_htable.groupby('HydroID')['find_match'].idxmin()].reset_index(drop=True) + df_htable.to_csv(src_table,index=False) + if __name__ == '__main__': @@ -544,6 +580,7 @@ def __vprint(message,verbose): parser.add_argument('-i','--inundation-raster',help='Inundation Raster output. Only writes if designated. Appends HUC code in batch mode.',required=False,default=None) parser.add_argument('-p','--inundation-polygon',help='Inundation polygon output. Only writes if designated. Appends HUC code in batch mode.',required=False,default=None) parser.add_argument('-d','--depths',help='Depths raster output. Only writes if designated. Appends HUC code in batch mode.',required=False,default=None) + parser.add_argument('-n','--src-table',help='Output table with the SRC lookup/interpolation. Only writes if designated. Appends HUC code in batch mode.',required=False,default=None) parser.add_argument('-q','--quiet',help='Quiet terminal output',required=False,default=False,action='store_true') # extract to dictionary diff --git a/tools/inundation_wrapper_custom_flow.py b/tools/inundation_wrapper_custom_flow.py new file mode 100755 index 000000000..72ce20bf0 --- /dev/null +++ b/tools/inundation_wrapper_custom_flow.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python3 + +# Created: 1/11/2021 +# Primary developer(s): ryan.spies@noaa.gov +# Purpose: This script provides the user to input a customized flow entry to produce +# inundation outputs using outputs from fim_run. Note that the flow csv must be +# formatted with "feature_id" & "discharge" columns. Flow must be in cubic m/s + +import os +import sys +import argparse +import shutil +from inundation import inundate + +TEST_CASES_DIR = r'/data/inundation_review/inundation_custom_flow/' # Will update. +INPUTS_DIR = r'/data/inputs' +OUTPUTS_DIR = os.environ['outputDataDir'] + +ENDC = '\033[m' +TGREEN_BOLD = '\033[32;1m' +TGREEN = '\033[32m' +TRED_BOLD = '\033[31;1m' +TWHITE = '\033[37m' +WHITE_BOLD = '\033[37;1m' +CYAN_BOLD = '\033[36;1m' + +def run_recurr_test(fim_run_dir, branch_name, huc_id, input_flow_csv, mask_type='huc'): + + # Construct paths to development test results if not existent. + huc_id_dir_parent = os.path.join(TEST_CASES_DIR, huc_id) + if not os.path.exists(huc_id_dir_parent): + os.mkdir(huc_id_dir_parent) + branch_test_case_dir_parent = os.path.join(TEST_CASES_DIR, huc_id, branch_name) + + # Delete the entire directory if it already exists. + if os.path.exists(branch_test_case_dir_parent): + shutil.rmtree(branch_test_case_dir_parent) + + print("Running the NWM recurrence intervals for HUC: " + huc_id + ", " + branch_name + "...") + + assert os.path.exists(fim_run_dir), "Cannot locate " + fim_run_dir + + # Create paths to fim_run outputs for use in inundate(). + if "previous_fim" in fim_run_dir and "fim_2" in fim_run_dir: + rem = os.path.join(fim_run_dir, 'rem_clipped_zeroed_masked.tif') + catchments = os.path.join(fim_run_dir, 'gw_catchments_reaches_clipped_addedAttributes.tif') + if not os.path.isfile(rem): + print('Can not find REM file: ' + str(rem)) + if not os.path.isfile(catchments): + print('Can not find catchments file: ' + str(catchments)) + else: + rem = os.path.join(fim_run_dir, 'rem_zeroed_masked.tif') + catchments = os.path.join(fim_run_dir, 'gw_catchments_reaches_filtered_addedAttributes.tif') + if not os.path.isfile(rem): + print('Can not find REM file: ' + str(rem)) + if not os.path.isfile(catchments): + print('Can not find catchments file: ' + str(catchments)) + if mask_type == 'huc': + catchment_poly = '' + print('Not using the catchment polygon input layer -- FIM version < 3??') + else: + catchment_poly = os.path.join(fim_run_dir, 'gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg') + if not os.path.isfile(catchment_poly): + print('Can not find catchments polygon file: ' + str(catchments)) + hydro_table = os.path.join(fim_run_dir, 'hydroTable.csv') + if not os.path.isfile(hydro_table): + print('Can not find hydro_table file: ' + str(hydro_table)) + + #benchmark_category = huc_id.split('_')[1] + current_huc = huc_id.split('_')[0] # Break off HUC ID and assign to variable. + wbd_huc = 'WBDHU' + str(len(huc_id)) # check if the input huc is 2,4,6,8 etc + + # Map necessary inputs for inundation(). + hucs, hucs_layerName = os.path.join(INPUTS_DIR, 'wbd', 'WBD_National.gpkg'), wbd_huc + + if not os.path.exists(branch_test_case_dir_parent): + os.mkdir(branch_test_case_dir_parent) + + + #branch_test_case_dir = os.path.join(branch_test_case_dir_parent) + + #os.makedirs(branch_test_case_dir) # Make output directory for branch. + + # Define paths to inundation_raster and forecast file. + inundation_raster = os.path.join(branch_test_case_dir_parent, branch_name + '_inund_extent.tif') + forecast = os.path.join(TEST_CASES_DIR,"_input_flow_files", input_flow_csv) + if not os.path.isfile(forecast): + print('Can not find input flow file: ' + str(forecast)) + + # Copy forecast flow file into the outputs directory to all viewer to reference the flows used to create inundation_raster + shutil.copyfile(forecast,os.path.join(branch_test_case_dir_parent,input_flow_csv)) + + # Run inundate. + print("-----> Running inundate() to produce modeled inundation extent for the " + input_flow_csv) + inundate( + rem,catchments,catchment_poly,hydro_table,forecast,mask_type,hucs=hucs,hucs_layerName=hucs_layerName, + subset_hucs=current_huc,num_workers=1,aggregate=False,inundation_raster=inundation_raster,inundation_polygon=None, + depths=None,out_raster_profile=None,out_vector_profile=None,quiet=False + ) + + print("-----> Inundation mapping complete.") + if not os.path.isfile(inundation_raster): + print('Warning!! Inundation Raster not produced: ' + str(inundation_raster)) + + +if __name__ == '__main__': + + # Parse arguments. + parser = argparse.ArgumentParser(description='Inundation mapping for FOSS FIM using a user supplied flow data file. Inundation outputs are stored in the /inundation_review/inundation_custom_flow/ directory.') + parser.add_argument('-r','--fim-run-dir',help='Name of directory containing outputs of fim_run.sh (e.g. data/ouputs/dev_abc/12345678_dev/12345678)',required=True) + parser.add_argument('-b', '--branch-name',help='The name of the working branch in which features are being tested (used to name the output inundation directory) -> type=str',required=True,default="") + parser.add_argument('-t', '--huc-id',help='The huc id to use (single huc). Format as: xxxxxxxx, e.g. 12345678',required=True,default="") + parser.add_argument('-m', '--mask-type', help='Optional: specify \'huc\' (FIM < 3) or \'filter\' (FIM >= 3) masking method', required=False,default="huc") + parser.add_argument('-y', '--input-flow-csv',help='Filename of the user generated (customized) csv. Must contain nwm feature ids and flow value(s) (units: cms) --> put this file in the "_input_flow_files" directory',required=True, default="") + + + # Extract to dictionary and assign to variables. + args = vars(parser.parse_args()) + + valid_huc_id_list = ['nwm_recurr'] + + exit_flag = False # Default to False. + print() + + # Ensure fim_run_dir exists. + if not os.path.exists(args['fim_run_dir']): + print(TRED_BOLD + "Warning: " + WHITE_BOLD + "The provided fim_run_dir (-r) " + CYAN_BOLD + args['fim_run_dir'] + WHITE_BOLD + " could not be located in the 'outputs' directory." + ENDC) + print(WHITE_BOLD + "Please provide the parent directory name for fim_run.sh outputs. These outputs are usually written in a subdirectory, e.g. data/outputs/123456/123456." + ENDC) + print() + exit_flag = True + + + if args['input_flow_csv'] == '': + print(TRED_BOLD + "Error: " + WHITE_BOLD + "The provided input_flow_csv (-y) " + CYAN_BOLD + args['input_flow_csv'] + WHITE_BOLD + " is not provided. Please provide a csv file with nwm featureid and flow values" + ENDC) + exit_flag = True + + + if exit_flag: + print() + sys.exit() + + + else: + + run_recurr_test(**args) diff --git a/tools/inundation_wrapper_nwm_flows.py b/tools/inundation_wrapper_nwm_flows.py new file mode 100755 index 000000000..8af5462f4 --- /dev/null +++ b/tools/inundation_wrapper_nwm_flows.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python3 + +# Created: 1/10/2021 +# Primary developer(s): ryan.spies@noaa.gov +# Purpose: This script provides the user to generate inundation outputs using +# the NWM Recurrence Interval flow data for 1.5yr, 5yr, & 10yr events. + +import os +import sys +import csv +import argparse +import shutil +from inundation import inundate + +INUN_REVIEW_DIR = r'/data/inundation_review/inundation_nwm_recurr/' # Will update. +INPUTS_DIR = r'/data/inputs' +OUTPUTS_DIR = os.environ['outputDataDir'] + +ENDC = '\033[m' +TGREEN_BOLD = '\033[32;1m' +TGREEN = '\033[32m' +TRED_BOLD = '\033[31;1m' +TWHITE = '\033[37m' +WHITE_BOLD = '\033[37;1m' +CYAN_BOLD = '\033[36;1m' + +def run_recurr_test(fim_run_dir, branch_name, huc_id, magnitude, mask_type='huc', output_dir=None): + + # Construct paths to development test results if not existent. + huc_id_dir_parent = os.path.join(INUN_REVIEW_DIR, huc_id) + if not os.path.exists(huc_id_dir_parent): + os.mkdir(huc_id_dir_parent) + + if output_dir == None: + branch_test_case_dir_parent = os.path.join(INUN_REVIEW_DIR, huc_id, branch_name) + else: + branch_test_case_dir_parent = os.path.join(output_dir, huc_id, branch_name) + + # Delete the entire directory if it already exists. + if os.path.exists(branch_test_case_dir_parent): + shutil.rmtree(branch_test_case_dir_parent) + + print("Running the NWM recurrence intervals for huc_id: " + huc_id + ", " + branch_name + "...") + + fim_run_parent = os.path.join(fim_run_dir) + assert os.path.exists(fim_run_parent), "Cannot locate " + fim_run_parent + + # Create paths to fim_run outputs for use in inundate(). + if "previous_fim" in fim_run_parent and "fim_2" in fim_run_parent: + rem = os.path.join(fim_run_parent, 'rem_clipped_zeroed_masked.tif') + catchments = os.path.join(fim_run_parent, 'gw_catchments_reaches_clipped_addedAttributes.tif') + else: + rem = os.path.join(fim_run_parent, 'rem_zeroed_masked.tif') + catchments = os.path.join(fim_run_parent, 'gw_catchments_reaches_filtered_addedAttributes.tif') + if mask_type == 'huc': + catchment_poly = '' + else: + catchment_poly = os.path.join(fim_run_parent, 'gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg') + hydro_table = os.path.join(fim_run_parent, 'hydroTable.csv') + + # Map necessary inputs for inundation(). + hucs, hucs_layerName = os.path.join(INPUTS_DIR, 'wbd', 'WBD_National.gpkg'), 'WBDHU8' + + #benchmark_category = huc_id.split('_')[1] + current_huc = huc_id.split('_')[0] # Break off HUC ID and assign to variable. + + if not os.path.exists(branch_test_case_dir_parent): + os.mkdir(branch_test_case_dir_parent) + + # Check if magnitude is list of magnitudes or single value. + magnitude_list = magnitude + if type(magnitude_list) != list: + magnitude_list = [magnitude_list] + + for magnitude in magnitude_list: + # Construct path to validation raster and forecast file. + + branch_test_case_dir = os.path.join(branch_test_case_dir_parent, magnitude) + + os.makedirs(branch_test_case_dir) # Make output directory for branch. + + # Define paths to inundation_raster and forecast file. + inundation_raster = os.path.join(branch_test_case_dir, branch_name + '_inund_extent.tif') + forecast = os.path.join(INUN_REVIEW_DIR, 'nwm_recurr_flow_data', 'recurr_' + magnitude + '_cms.csv') + + # Run inundate. + print("-----> Running inundate() to produce modeled inundation extent for the " + magnitude + " magnitude...") + inundate( + rem,catchments,catchment_poly,hydro_table,forecast,mask_type,hucs=hucs,hucs_layerName=hucs_layerName, + subset_hucs=current_huc,num_workers=1,aggregate=False,inundation_raster=inundation_raster,inundation_polygon=None, + depths=None,out_raster_profile=None,out_vector_profile=None,quiet=True + ) + + print("-----> Inundation mapping complete.") + + +if __name__ == '__main__': + + # Parse arguments. + parser = argparse.ArgumentParser(description='Inundation mapping for FOSS FIM using streamflow recurrence interflow data. Inundation outputs are stored in the /inundation_review/inundation_nwm_recurr/ directory.') + parser.add_argument('-r','--fim-run-dir',help='Name of directory containing outputs of fim_run.sh (e.g. data/ouputs/dev_abc/12345678_dev_test)',required=True) + parser.add_argument('-b', '--branch-name',help='The name of the working branch in which features are being tested (used to name the output inundation directory) -> type=str',required=True,default="") + parser.add_argument('-t', '--huc-id',help='Provide either a single hucid (Format as: xxxxxxxx, e.g. 12345678) or a filepath to a list of hucids',required=True,default="") + parser.add_argument('-m', '--mask-type', help='Optional: specify \'huc\' (FIM < 3) or \'filter\' (FIM >= 3) masking method', required=False,default="huc") + parser.add_argument('-y', '--magnitude',help='The magnitude (reccur interval) to run. Leave blank to use default intervals (options: 1_5, 5_0, 10_0).',required=False, default="") + + + # Extract to dictionary and assign to variables. + args = vars(parser.parse_args()) + + valid_test_id_list = ['nwm_recurr'] + + exit_flag = False # Default to False. + print() + + # check if user provided a single huc_id or a file path to a list of huc ids + if args['huc_id'].isdigit(): + huc_list = [args['huc_id']] + elif os.path.exists(args['huc_id']): # check if provided str is a valid path + with open(args['huc_id'],newline='') as list_file: + read_list = csv.reader(list_file) + huc_list=[i for row in read_list for i in row] + else: + print(TRED_BOLD + "Warning: " + WHITE_BOLD + "Invalid huc-id entry: " + CYAN_BOLD + args['fim_run_dir'] + WHITE_BOLD + " --> check that huc_id number or list file is valid") + exit_flag = True + print(huc_list) + if exit_flag: + print() + sys.exit() + + for huc_id in huc_list: + args['huc_id'] = huc_id + # Ensure fim_run_dir exists. + fim_run_dir = args['fim_run_dir'] + os.sep + huc_id + if not os.path.exists(fim_run_dir): + print(TRED_BOLD + "Warning: " + WHITE_BOLD + "The provided fim_run_dir (-r) " + CYAN_BOLD + fim_run_dir + WHITE_BOLD + " could not be located in the 'outputs' directory." + ENDC) + print(WHITE_BOLD + "Please provide the parent directory name for fim_run.sh outputs. These outputs are usually written in a subdirectory, e.g. data/outputs/123456/123456." + ENDC) + print() + exit_flag = True + + # Ensure valid flow recurr intervals + default_flow_intervals = ['1_5','5_0','10_0'] + if args['magnitude'] == '': + args['magnitude'] = default_flow_intervals + print(TRED_BOLD + "Using default flow reccurence intervals: " + WHITE_BOLD + str(default_flow_intervals)[1:-1]) + else: + if set(default_flow_intervals).issuperset(set(args['magnitude'])) == False: + print(TRED_BOLD + "Error: " + WHITE_BOLD + "The provided magnitude (-y) " + CYAN_BOLD + args['magnitude'] + WHITE_BOLD + " is invalid. NWM Recurrence Interval options include: " + str(default_flow_intervals)[1:-1] + ENDC) + exit_flag = True + + if exit_flag: + print() + sys.exit() + + else: + run_recurr_test(fim_run_dir,args['branch_name'],huc_id,args['magnitude'],args['mask_type']) diff --git a/tools/preprocess_ahps_nws.py b/tools/preprocess_ahps_nws.py new file mode 100644 index 000000000..8c5a5f5ec --- /dev/null +++ b/tools/preprocess_ahps_nws.py @@ -0,0 +1,366 @@ +#!/usr/bin/env python3 +import numpy as np +from pathlib import Path +import pandas as pd +import geopandas as gpd +import rasterio +from tools_shared_functions import mainstem_nwm_segs, get_metadata, aggregate_wbd_hucs, get_thresholds, get_datum, ngvd_to_navd_ft, get_rating_curve, select_grids, get_nwm_segs, flow_data, process_extent, process_grid, raster_to_feature +import argparse +from dotenv import load_dotenv +import os +import traceback +import sys +sys.path.append('/foss_fim/src') + + +def get_env_paths(): + load_dotenv() + #import variables from .env file + API_BASE_URL = os.getenv("API_BASE_URL") + EVALUATED_SITES_CSV = os.getenv("EVALUATED_SITES_CSV") + WBD_LAYER = os.getenv("WBD_LAYER") + return API_BASE_URL, EVALUATED_SITES_CSV, WBD_LAYER + +######################################################## +#Preprocess AHPS NWS +#This script will work on NWS AHPS fim data (some assumptions made about the data structure). +#Provide a source directory path (source_dir) where all NWS AHPS FIM data is located. NWS source data was previously downloaded and extracted. Some data is buried through several layers of subfolders in the source data. In general, the downloaded datasets were unzipped and starting from where the folder name was the AHPS code, this was copied and pasted into a new directory which is the source_dir. +#Provide a destination directory path (destination) which is where all outputs are located. +#Provide a reference raster path. +######################################################## +# source_dir = Path(r'path/to/nws/downloads') +# destination = Path(r'path/to/preprocessed/nws/data') +# reference_raster= Path(r'path/to/reference raster') + + +def preprocess_nws(source_dir, destination, reference_raster): + source_dir = Path(source_dir) + destination = Path(destination) + reference_raster = Path(reference_raster) + metadata_url = f'{API_BASE_URL}/metadata' + threshold_url = f'{API_BASE_URL}/nws_threshold' + rating_curve_url = f'{API_BASE_URL}/rating_curve' + log_file = destination / 'log.txt' + + #Write a run-time log file + destination.mkdir(parents = True, exist_ok = True) + log_file = destination / 'log.txt' + f = open(log_file, 'a+') + + #Define distance (in miles) to search for nwm segments + nwm_ds_search = 10 + nwm_us_search = 10 + #The NWS data was downloaded and unzipped. The ahps folder (with 5 digit code as folder name) was cut and pasted into a separate directory. So the ahps_codes iterates through that parent directory to get all of the AHPS codes that have data. + ahps_codes = [i.name for i in source_dir.glob('*') if i.is_dir() and len(i.name) == 5] + #Get mainstems NWM segments + #Workaround for sites in 02030103 and 02030104, many are not rfc_forecast_point = True + list_of_sites = pd.read_csv(EVALUATED_SITES_CSV)['Total_List'].to_list() + ms_segs = mainstem_nwm_segs(metadata_url, list_of_sites) + + #Find depth grid subfolder + for code in ahps_codes: + f.write(f'{code} : Processing\n') + print(f'processing {code}') + #'mnda2' is in Alaska outside of NWM domain. + if code in ['mnda2']: + f.write(f'{code} : skipping because outside of NWM domain\n') + continue + + #Get metadata of site and search for NWM segments x miles upstream/x miles downstream + select_by = 'nws_lid' + selector = [code] + metadata_list, metadata_df = get_metadata(metadata_url, select_by, selector, must_include = None, upstream_trace_distance = nwm_us_search, downstream_trace_distance = nwm_ds_search) + metadata = metadata_list[0] + + #Assign huc to site using FIM huc layer. + dictionary, out_gdf = aggregate_wbd_hucs(metadata_list, Path(WBD_LAYER), retain_attributes = False) + [huc] = list(dictionary.keys()) + + #Get thresholds for action, minor, moderate, major. If no threshold data present, exit. + #The threshold flows source will dictate what rating curve (and datum) to use as it uses a decision tree (USGS priority then NRLDB) + #In multiple instances a USGS ID is given but then no USGS rating curve or in some cases no USGS datum is supplied. + select_by = 'nws_lid' + selector = code + stages, flows =get_thresholds(threshold_url, select_by, selector, threshold = 'all') + + #Make sure at least one valid threshold is supplied from WRDS. + threshold_categories = ['action','minor','moderate','major'] + if not any([stages[threshold] for threshold in threshold_categories]): + f.write(f'{code} : skipping because no threshold stages available\n') + continue + + #determine source of interpolated threshold flows, this will be the rating curve that will be used. + rating_curve_source = flows.get('source') + if rating_curve_source is None: + f.write(f'{code} : skipping because no rating curve source\n') + continue + + #Workaround for "bmbp1" where the only valid datum is from NRLDB (USGS datum is null). Modifying rating curve source will influence the rating curve and datum retrieved for benchmark determinations. + if code == 'bmbp1': + rating_curve_source = 'NRLDB' + + #Get the datum and adjust to NAVD if necessary. + nws, usgs = get_datum(metadata) + datum_data = {} + if rating_curve_source == 'USGS Rating Depot': + datum_data = usgs + elif rating_curve_source == 'NRLDB': + datum_data = nws + + #If datum not supplied, skip to new site + datum = datum_data.get('datum', None) + if datum is None: + f.write(f'{code} : skipping because site is missing datum\n') + continue + + #Custom workaround these sites have faulty crs from WRDS. CRS needed for NGVD29 conversion to NAVD88 + # USGS info indicates NAD83 for site: bgwn7, fatw3, mnvn4, nhpp1, pinn4, rgln4, rssk1, sign4, smfn7, stkn4, wlln7 + # Assumed to be NAD83 (no info from USGS or NWS data): dlrt2, eagi1, eppt2, jffw3, ldot2, rgdt2 + if code in ['bgwn7', 'dlrt2','eagi1','eppt2','fatw3','jffw3','ldot2','mnvn4','nhpp1','pinn4','rgdt2','rgln4','rssk1','sign4','smfn7','stkn4','wlln7' ]: + datum_data.update(crs = 'NAD83') + + #Workaround for bmbp1; CRS supplied by NRLDB is mis-assigned (NAD29) and is actually NAD27. This was verified by converting USGS coordinates (in NAD83) for bmbp1 to NAD27 and it matches NRLDB coordinates. + if code == 'bmbp1': + datum_data.update(crs = 'NAD27') + + #Custom workaround these sites have poorly defined vcs from WRDS. VCS needed to ensure datum reported in NAVD88. If NGVD29 it is converted to NAVD88. + #bgwn7, eagi1 vertical datum unknown, assume navd88 + #fatw3 USGS data indicates vcs is NAVD88 (USGS and NWS info agree on datum value). + #wlln7 USGS data indicates vcs is NGVD29 (USGS and NWS info agree on datum value). + if code in ['bgwn7','eagi1','fatw3']: + datum_data.update(vcs = 'NAVD88') + elif code == 'wlln7': + datum_data.update(vcs = 'NGVD29') + + #Adjust datum to NAVD88 if needed + if datum_data.get('vcs') in ['NGVD29', 'NGVD 1929', 'NGVD,1929']: + #Get the datum adjustment to convert NGVD to NAVD. Sites not in contiguous US are previously removed otherwise the region needs changed. + datum_adj_ft = ngvd_to_navd_ft(datum_info = datum_data, region = 'contiguous') + datum88 = round(datum + datum_adj_ft, 2) + else: + datum88 = datum + + #get entire rating curve, same source as interpolated threshold flows (USGS Rating Depot first then NRLDB rating curve). + if rating_curve_source == 'NRLDB': + site = [code] + elif rating_curve_source == 'USGS Rating Depot': + site = [metadata.get('identifiers').get('usgs_site_code')] + + rating_curve = get_rating_curve(rating_curve_url, site) + + #Add elevation fields to rating curve + #Add field with vertical coordinate system + vcs = datum_data['vcs'] + if not vcs: + vcs = 'Unspecified, Assumed NAVD88' + rating_curve['vcs'] = vcs + + #Add field with original datum + rating_curve['datum'] = datum + + #If VCS is NGVD29 add rating curve elevation (in NGVD) as well as the NAVD88 datum + if vcs in ['NGVD29', 'NGVD 1929']: + #Add field with raw elevation conversion (datum + stage) + rating_curve['elevation_ngvd29'] = rating_curve['stage'] + datum + #Add field with adjusted NAVD88 datum + rating_curve['datum_navd88'] = datum88 + + #Add field with NAVD88 elevation + rating_curve['elevation_navd88'] = rating_curve['stage'] + datum88 + + + #Search through ahps directory find depth grid folder + parent_path = source_dir / code + + #Work around for bgwn7 and smit2 where grids were custom created from polygons (bgwn7-no grids, smit2 - no projection and applying projection from polygons had errors) + if code in ['bgwn7', 'smit2']: + [grids_dir] = [directory for directory in parent_path.glob('*custom*') if directory.is_dir()] + else: + #Find the directory containing depth grids. Assumes only one directory will be returned. + [grids_dir] = [directory for directory in parent_path.glob('*depth_grid*') if directory.is_dir()] + + #Get grids (all NWS ESRI grids were converted to Geotiff) + grid_paths = [grids for grids in grids_dir.glob('*.tif*') if grids.suffix in ['.tif', '.tiff']] + grid_names = [name.stem for name in grid_paths] + #If grids are present, interpolate a flow for the grid. + if grid_paths: + #Construct Dataframe containing grid paths, names, datum, code + df = pd.DataFrame({'code': code, 'path':grid_paths, 'name': grid_names, 'datum88': datum88}) + #Determine elevation from the grid name. All elevations are assumed to be in NAVD88 based on random inspection of AHPS inundation website layers. + df['elevation'] = df['name'].str.replace('elev_', '', case = False).str.replace('_','.').astype(float) + # Add a stage column using the datum (in NAVD88). Stage is rounded to the nearest 0.1 ft. + df['stage'] = round(df['elevation'] - df['datum88'],1) + #Sort stage in ascending order + df.sort_values(by = 'elevation', ascending = True, inplace = True) + #Interpolate flow from the rating curve using the elevation_navd88 values, if value is above or below the rating curve assign nan. + df['flow'] = np.interp(df['elevation'], rating_curve['elevation_navd88'], rating_curve['flow'], left = np.nan, right = np.nan) + #Assign flow source to reflect interpolation from rc + df['flow_source'] = f'interpolated from {rating_curve_source} rating curve' + + else: + f.write(f'{code} : Site has no benchmark grids\n') + + #Select the appropriate threshold grid for evaluation. Using the supplied threshold stages and the calculated map stages. + grids,grid_flows = select_grids(df, stages, datum88, 1.1) + + #workaroud for bigi1 and eag1 which have gridnames based on flows (not elevations) + if code in ['eagi1', 'bigi1']: + #Elevation is really flows (due to file names), assign this to stage + df['flow'] = df['elevation'] + df['stage'] = df['elevation'] + #Select grids using flows + grids, grid_flows = select_grids(df, flows, datum88, 500) + f.write(f'{code} : Site workaround grids names based on flows not elevation\n') + + #Obtain NWM segments that are on ms to apply flows + segments = get_nwm_segs(metadata) + site_ms_segs = set(segments).intersection(ms_segs) + segments = list(site_ms_segs) + + #Write out boolean benchmark raster and flow file + try: + #for each threshold + for i in ['action', 'minor', 'moderate', 'major']: + #Obtain the flow and grid associated with threshold. + flow = grid_flows[i] + grid = grids[i] + extent = grids['extent'] + #Make sure that flow and flow grid are valid + if not grid in ['No Map', 'No Threshold', 'No Flow']: + #define output directory (to be created later) + outputdir = destination / huc / code / i + + #Create Binary Grids, first create domain of analysis, then create binary grid + + #Domain extent is largest floodmap in the static library WITH holes filled + filled_domain_raster = outputdir.parent / f'{code}_filled_orig_domain.tif' + + #Open benchmark data as a rasterio object. + benchmark = rasterio.open(grid) + benchmark_profile = benchmark.profile + + #Open extent data as rasterio object + domain = rasterio.open(extent) + domain_profile = domain.profile + + #if grid doesn't have CRS, then assign CRS using a polygon from the ahps inundation library + if not benchmark.crs: + #Obtain crs of the first polygon inundation layer associated with ahps code. Assumes only one polygon* subdirectory and assumes the polygon directory has at least 1 inundation shapefile. + [ahps_polygons_directory] = [directory for directory in parent_path.glob('*polygon*') if directory.is_dir()] + shapefile_path = list(ahps_polygons_directory.glob('*.shp'))[0] + shapefile = gpd.read_file(shapefile_path) + #Update benchmark and domain profiles with crs from shapefile. Assumed that benchmark/extent have same crs. + benchmark_profile.update(crs = shapefile.crs) + domain_profile.update(crs = shapefile.crs) + + #Create a domain raster if it does not exist. + if not filled_domain_raster.exists(): + #Domain should have donut holes removed + process_extent(domain, domain_profile, output_raster = filled_domain_raster) + + + #Open domain raster as rasterio object + filled_domain = rasterio.open(filled_domain_raster) + filled_domain_profile = filled_domain.profile + + #Create the binary benchmark raster + boolean_benchmark, boolean_profile = process_grid(benchmark, benchmark_profile, filled_domain, filled_domain_profile, reference_raster) + + #Output binary benchmark grid and flow file to destination + outputdir.mkdir(parents = True, exist_ok = True) + output_raster = outputdir / (f'ahps_{code}_huc_{huc}_extent_{i}.tif') + + with rasterio.Env(): + with rasterio.open(output_raster, 'w', **boolean_profile) as dst: + dst.write(boolean_benchmark,1) + + #Close datasets + domain.close() + filled_domain.close() + benchmark.close() + + #Create the guts of the flow file. + flow_info = flow_data(segments,flow) + #Write out the flow file to csv + output_flow_file = outputdir / (f'ahps_{code}_huc_{huc}_flows_{i}.csv') + flow_info.to_csv(output_flow_file, index = False) + + except Exception as e: + f.write(f'{code} : Error preprocessing benchmark\n{repr(e)}\n') + f.write(traceback.format_exc()) + f.write('\n') + print(traceback.format_exc()) + #Wrapup for ahps sites that were processed. + ahps_directory = destination / huc / code + if ahps_directory.exists(): + #Delete original filled domain raster (it is an intermediate file to create benchmark data) + orig_domain_grid = ahps_directory / f'{code}_filled_orig_domain.tif' + orig_domain_grid.unlink() + #Create domain shapefile from any benchmark grid for site (each benchmark has domain footprint, value = 0). + filled_extent = list(ahps_directory.rglob('*_extent_*.tif'))[0] + domain_gpd = raster_to_feature(grid = filled_extent, profile_override = False, footprint_only = True) + domain_gpd['nws_lid'] = code + domain_gpd.to_file(ahps_directory / f'{code}_domain.shp') + #Populate attribute information for site + grids_attributes = pd.DataFrame(data=grids.items(), columns = ['magnitude','path']) + flows_attributes = pd.DataFrame(data=grid_flows.items(), columns=['magnitude','grid_flow_cfs']) + threshold_attributes = pd.DataFrame(data=stages.items(), columns = ['magnitude','magnitude_stage']) + #merge dataframes + attributes = grids_attributes.merge(flows_attributes, on = 'magnitude') + attributes = attributes.merge(threshold_attributes, on = 'magnitude') + attributes = attributes.merge(df[['path','stage','elevation', 'flow_source']], on = 'path') + #Strip out sensitive paths and convert magnitude stage to elevation + attributes['path'] = attributes['path'].apply(lambda x :Path(x).name) + attributes['magnitude_elev_navd88']=(datum88 + attributes['magnitude_stage']).astype(float).round(1) + #Add general site information + attributes['nws_lid'] = code + attributes['wfo'] = metadata['nws_data']['wfo'] + attributes['rfc'] = metadata['nws_data']['rfc'] + attributes['state'] = metadata['nws_data']['state'] + attributes['huc'] = huc + #Rename and Reorder columns + attributes.rename(columns = {'path':'grid_name', 'flow_source':'grid_flow_source','stage':'grid_stage','elevation':'grid_elev_navd88'}, inplace = True) + attributes = attributes[['nws_lid','wfo','rfc','state','huc','magnitude','magnitude_stage','magnitude_elev_navd88','grid_name','grid_stage','grid_elev_navd88', 'grid_flow_cfs','grid_flow_source']] + #Save attributes to csv + attributes.to_csv(ahps_directory / f'{code}_attributes.csv', index = False) + + #Write the rating curve to a file + rating_curve_output = ahps_directory / (f'{code}_rating_curve.csv') + rating_curve['lat'] = datum_data['lat'] + rating_curve['lon'] = datum_data['lon'] + rating_curve.to_csv(rating_curve_output, index = False) + + #Write the interpolated flows to file + df_output = ahps_directory / (f'{code}_interpolated_flows.csv') + df.to_csv(df_output, index = False) + + else: + f.write(f'{code} : Unable to evaluate site, missing all flows\n') + + #Close log file. + f.close() + + #Combine all attribute files + attribute_files = list(destination.rglob('*_attributes.csv')) + all_attributes = pd.DataFrame() + for i in attribute_files: + attribute_df = pd.read_csv(i, dtype={'huc':str}) + all_attributes = all_attributes.append(attribute_df) + + if not all_attributes.empty: + all_attributes.to_csv(destination / 'attributes.csv', index = False) + return + + +if __name__ == '__main__': + #Parse arguments + parser = argparse.ArgumentParser(description = 'Create preprocessed USGS benchmark datasets at AHPS locations.') + parser.add_argument('-s', '--source_dir', help = 'Workspace where all source data is located.', required = True) + parser.add_argument('-d', '--destination', help = 'Directory where outputs are to be stored', required = True) + parser.add_argument('-r', '--reference_raster', help = 'reference raster used for benchmark raster creation', required = True) + args = vars(parser.parse_args()) + + + #Run get_env_paths and static_flow_lids + API_BASE_URL, EVALUATED_SITES_CSV, WBD_LAYER = get_env_paths() + preprocess_nws(**args) \ No newline at end of file diff --git a/tools/preprocess_ahps_usgs.py b/tools/preprocess_ahps_usgs.py new file mode 100644 index 000000000..e34725b4d --- /dev/null +++ b/tools/preprocess_ahps_usgs.py @@ -0,0 +1,469 @@ +#!/usr/bin/env python3 +import numpy as np +from pathlib import Path +import pandas as pd +import rasterio +import requests +from tools_shared_functions import mainstem_nwm_segs, get_metadata, aggregate_wbd_hucs, get_thresholds, get_datum, ngvd_to_navd_ft, get_rating_curve, select_grids, get_nwm_segs, flow_data, process_extent, process_grid, raster_to_feature +import argparse +from dotenv import load_dotenv +import os +import sys +sys.path.append('/foss_fim/src') +import traceback + + +def get_env_paths(): + load_dotenv() + #import variables from .env file + API_BASE_URL = os.getenv("API_BASE_URL") + EVALUATED_SITES_CSV = os.getenv("EVALUATED_SITES_CSV") + WBD_LAYER = os.getenv("WBD_LAYER") + USGS_METADATA_URL = os.getenv("USGS_METADATA_URL") + return API_BASE_URL, EVALUATED_SITES_CSV, WBD_LAYER, USGS_METADATA_URL +############################################################################### +#Get USGS Site metadata +############################################################################### +def usgs_site_metadata(code): + ''' + Retrieves site metadata from USGS API and saves output as dictionary. Information used includes shortname and site number. + + Parameters + ---------- + code : STR + AHPS code. + USGS_METADATA_URL : STR + URL for USGS datasets. + + Returns + ------- + site_metadata : DICT + Output metadata for an AHPS site. + ''' + # Make sure code is lower case + code = code.lower() + # Get site metadata from USGS API using ahps code + site_url = f'{USGS_METADATA_URL}/server/rest/services/FIMMapper/sites/MapServer/0/query?where=AHPS_ID+%3D+%27{code}%27&text=&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&relationParam=&outFields=*&returnGeometry=false&returnTrueCurves=false&maxAllowableOffset=&geometryPrecision=&outSR=&having=&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&historicMoment=&returnDistinctValues=false&resultOffset=&resultRecordCount=&queryByDistance=&returnExtentOnly=false&datumTransformation=¶meterValues=&rangeValues=&quantizationParameters=&f=pjson' + #Get data from API + response = requests.get(site_url) + #If response is valid, then get metadata and save to dictionary + if response.ok: + response_json = response.json() + site_metadata = response_json['features'][0]['attributes'] + return site_metadata + +############################################################################### +#Get USGS grid metadata +############################################################################### +def usgs_grid_metadata(code, has_grid_override = False): + ''' + Given an ahps code, retrieve the site metadata (using usgs_site_metadata) and then use that information to obtain metadata about available grids. Information includes elevation, stage, and flow for each grid. + + Parameters + ---------- + code : STR + AHPS code. + + Returns + ------- + appended_dictionary : DICT + Dictionary of metadata for each available inundation grid including grid id, flows, elevations, grid name for each inundation grid. + ''' + #Make sure code is in lower case + code = code.lower() + # Get site_metadata + site_metadata = usgs_site_metadata(code) + #From site metadata get the SHORT_NAME, SITE_NO, and 'MULTI_SITE', 'HAS_GRIDS' key values + short_name = site_metadata['SHORT_NAME'] + site_no = site_metadata['SITE_NO'] + has_grids = site_metadata['HAS_GRIDS'] + #There is at least one site (kilo1) that doesn't have grids but polygons are available which have been converted grids. + if has_grid_override: + has_grids = 1 + multi_site = site_metadata['MULTI_SITE'] + #Grid metadata located at one of three URLs + if multi_site == 0 and has_grids == 1: + grids_url = f'{USGS_METADATA_URL}/server/rest/services/FIMMapper/floodExtents/MapServer/0/query?where=USGSID+%3D+%27{site_no}%27&text=&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&relationParam=&outFields=*&returnGeometry=false&returnTrueCurves=false&maxAllowableOffset=&geometryPrecision=&outSR=&having=&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&historicMoment=&returnDistinctValues=false&resultOffset=&resultRecordCount=&queryByDistance=&returnExtentOnly=false&datumTransformation=¶meterValues=&rangeValues=&quantizationParameters=&f=pjson' + elif multi_site > 0 and multi_site < 3 and has_grids == 1: + grids_url = f'{USGS_METADATA_URL}/server/rest/services/FIMMapper/floodExtentsMulti/MapServer/0/query?where=USGSID_1+%3D+%27{site_no}%27+OR+USGSID_2+%3D+%27{site_no}%27&text=&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&relationParam=&outFields=*&returnGeometry=false&returnTrueCurves=false&maxAllowableOffset=&geometryPrecision=&outSR=&having=&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&historicMoment=&returnDistinctValues=false&resultOffset=&resultRecordCount=&queryByDistance=&returnExtentOnly=false&datumTransformation=¶meterValues=&rangeValues=&quantizationParameters=&f=pjson' + elif multi_site == 3 and has_grids == 1: + grids_url = f'{USGS_METADATA_URL}/server/rest/services/FIMMapper/floodExtentsThreeSites/MapServer/0/query?where=USGSID_1+%3D+%27{site_no}%27+OR+USGSID_2+%3D+%27{site_no}%27+OR+USGSID_3+%3D+%27{site_no}%27&text=&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&relationParam=&outFields=*&returnGeometry=false&returnTrueCurves=false&maxAllowableOffset=&geometryPrecision=&outSR=&having=&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&historicMoment=&returnDistinctValues=false&resultOffset=&resultRecordCount=&queryByDistance=&returnExtentOnly=false&datumTransformation=¶meterValues=&rangeValues=&quantizationParameters=&f=pjson' + #Only get metadata on grids if site has grids available + if has_grids == 1: + #Get data from API + response = requests.get(grids_url) + #If response is valid then combine metadata on all grids into a single dictionary and write out to DataFrame. + if response.ok: + response_json =response.json() + metadata = response_json['features'] + appended_dictionary = {} + for i in metadata: + dictionary = i['attributes'] + gridname = short_name + '_' + str(dictionary['GRIDID']).zfill(4) + appended_dictionary[gridname] = dictionary + else: + appended_dictionary = {} + return appended_dictionary + + +######################################################## +#Preprocess USGS FIM +#This script will work on USGS FIM datasets. +#Provide source directory path (source_dir) where all USGS FIM data is located. This data was previously downloaded from USGS urls. +#Provide a destination directory path (destination) where all outputs are located. +#Provide a reference raster path. +######################################################## +#source_dir = Path(r'path/to/usgs/downloads') +#destination = Path(r'path/to/preprocessed/usgs/data') +#reference_raster= Path(r'path/to/reference raster') +def preprocess_usgs(source_dir, destination, reference_raster): + ''' + Preprocess USGS AHPS datasets. + + Parameters + ---------- + source_dir : str + Path to USGS Benchmark Datasets (AHPS) + destination : str + Path to output directory of preprocessed datasets. + reference_raster : str + Path to reference raster for benchmark binary raster creation. + + Returns + ------- + None. + + ''' + + source_dir = Path(source_dir) + destination = Path(destination) + reference_raster = Path(reference_raster) + metadata_url = f'{API_BASE_URL}/metadata' + threshold_url = f'{API_BASE_URL}/nws_threshold' + rating_curve_url = f'{API_BASE_URL}/rating_curve' + + #Write log file + destination.mkdir(parents=True, exist_ok = True) + log_file = destination / 'log.txt' + f = open(log_file, 'a+') + + #Define distance (in miles) to search for nwm segments + nwm_ds_search = 10 + nwm_us_search = 10 + #Need a list of AHPS codes. See "ahps_dictionaries" for method of getting this list. + ahps_codes = [folder.name for folder in source_dir.glob('*') if len(folder.name) == 5] + + #Get mainstems NWM segments + #Workaround for sites in 02030103 and 02030104, many are not rfc_forecast_point = True + #Import list of evaluated sites + list_of_sites = pd.read_csv(EVALUATED_SITES_CSV)['Total_List'].to_list() + ms_segs = mainstem_nwm_segs(metadata_url, list_of_sites) + + for code in ahps_codes: + f.write(f'{code} : Processing\n') + print(f'processing {code}') + #For a given code, find all inundation grids under that code. + code = code.lower() + + #Get metadata of site and search for NWM segments x miles upstream/x miles downstream + select_by = 'nws_lid' + selector = [code] + metadata_list, metadata_df = get_metadata(metadata_url, select_by, selector, must_include = None, upstream_trace_distance = nwm_us_search, downstream_trace_distance = nwm_ds_search) + metadata = metadata_list[0] + + #Assign huc to site using FIM huc layer. + dictionary, out_gdf = aggregate_wbd_hucs(metadata_list, Path(WBD_LAYER), retain_attributes = False) + [huc] = list(dictionary.keys()) + + #There are 12 sites with special issues such as these don't have any crs coordinates and grid/polygon data don't align or missing grid data but polygons are available. + #Sites with no grid data but polygon data --> cfmm8, kilo1 + #Sites with no projection assigned to grid and polygon/grid don't align --> stak1, nmso1, nori3, sasi3 + #Sites with reprojection issues using rasterio (manually reprojected with ESRI) --> kcdm7, knym7, mcri2, ptvn6, tmai4 + #Sites with incomplete grids (used polys to convert to grids) --> 'roun6' + ahps_dir = source_dir / code / 'depth_grids' + if code in ['cfmm8','kilo1','stak1', 'sasi3', 'nori3', 'nmso1', 'kcdm7', 'knym7', 'mcri2','ptvn6','tmai4', 'roun6']: + f.write(f'{code} : Custom workaround related to benchmark data (mismatch crs, no grid data, etc)\n') + ahps_dir = source_dir / code / 'custom' + + #Get thresholds (action/minor/moderate/major flows and stages), if not available exit. + #For USGS many sites may not have rating curves but the threshold stages are available. + + select_by = 'nws_lid' + selector = code + stages, flows =get_thresholds(threshold_url, select_by, selector, threshold = 'all') + + #Make sure at least one valid threshold is supplied from WRDS. + threshold_categories = ['action','minor','moderate','major'] + if not any([stages[threshold] for threshold in threshold_categories]): + f.write(f'{code} : Skipping because no threshold stages available\n') + continue + + #We need to adjust stages to elevations using the datum adjustment. This next section finds the datum adjustment. + #determine primary source for interpolated threshold flows (USGS first then NRLDB). This will dictate what rating curve to pull. + rating_curve_source = flows['source'] + #Workaround for sites that don't have rating curve but do have flows specified (USGS only). Assign rating_curve_source to 'USGS Rating Depot' manually inspected all of these sites and USGS datum is available and will be used. + if code in ['bdxt1','ccti3', 'fnnm7', 'mtao1', 'nfsi3', 'omot1' , 'sbrn1', 'vron4', 'watv1']: + rating_curve_source = 'USGS Rating Depot' + + #Get the datum and adjust to NAVD if necessary. + nws, usgs = get_datum(metadata) + datum_data = {} + if rating_curve_source == 'USGS Rating Depot': + datum_data = usgs + elif rating_curve_source == 'NRLDB': + datum_data = nws + + #If datum not supplied, skip to new site + datum = datum_data.get('datum', None) + if datum is None: + f.write(f'{code}: Skipping because of missing datum\n') + continue + + #Custom workaround, some sites have poorly defined crs. CRS requuired for ngvd to navd conversions + # Assumed NAVD88 (no info from USGS or NWS metadata): kynm7, ksvm7, yesg1 + # Assigned NAVD88 because USGS metadata indicates NAD83: arnm7, grfi2, kctm7, nast1, nhri3, roun6, vllm7 + # Assigned NAVD88 (reported NAVD 1988): cmtl1 + if code in ['arnm7', 'cmtl1','grfi2','kctm7','knym7','ksvm7','nast1','nhri3','roun6','vllm7','yesg1']: + #Update crs to NAD83 (some are assumed, others have USGS info indicating NAD83 crs) + datum_data.update(crs = 'NAD83') + + #Adjust datum to NAVD88 if needed (Assumes that if vcs not NGVD29 or NGVD 1929 it is in NAVD88) + if datum_data.get('vcs') in ['NGVD29', 'NGVD 1929']: + #Get the datum adjustment to convert NGVD to NAVD. Sites not in contiguous US are previously removed otherwise the region needs changed. + datum_adj_ft = ngvd_to_navd_ft(datum_info = datum_data, region = 'contiguous') + datum88 = round(datum + datum_adj_ft, 2) + else: + datum88 = datum + + + #Set Grid override flag, if set to True then the 'has_grids' property is ignored. Allows for custom workaround. + #Special exception for kilo1, where it has attribute (has_grids == 0) yet there is grid metadata and polygons were converted to grids. + if code == 'kilo1': + grid_override = True + f.write(f'{code} : Custom workaround related to "has_grids" attribute') + else: + grid_override = False + #get grid metadata (metadata includes, elevation/stage/flow and etc for each site). If empty exit. + grid_metadata = usgs_grid_metadata(code, has_grid_override=grid_override) + if not grid_metadata: + f.write(f'{code} : Skipping because no grid metadata available\n') + continue + + #Get paths of all grids that have been downloaded, if no grids available for site then exit. + grid_paths = [grids for grids in ahps_dir.glob('*.tif*') if grids.suffix in ['.tif', '.tiff']] + if not grid_paths: + f.write(f'{code} : Skipping because no benchmark grids available\n') + continue + + # Iterate through grid_metadata and add the path to the dictionary as well as an indicator of whether the path exists. + for key in grid_metadata: + #When USGS grid data was downloaded, grid was saved with the 'key' name. Update the grid_metadata to include the path. + path = ahps_dir / (key + '.tif') + grid_metadata[key]['path'] = path + #Verify that the path exists (in some instances the grid should be available but it isn't) and add as subkey + if path.is_file(): + grid_metadata[key]['path_exist'] = True + else: + grid_metadata[key]['path_exist'] = False + + #Convert grid metadata information to a DataFrame + df = pd.DataFrame.from_dict(grid_metadata, orient = 'index') + #Filter out rows where grids do not exist + df = df.query('path_exist == True') + #Prior to renaming columns do a check to make sure single site (will add functionality for multi-sites later) + if not 'QCFS' in df.columns: + f.write(f'{code} : Skipping because multisite\n') + continue + #Rename columns to match NWS AHPS data structure, this only applies to single USGS sites, if a multisite the columns are different from QCFS. + df.rename(columns = {'QCFS':'flow', 'STAGE':'stage', 'ELEV':'elevation'}, inplace=True) + #Many USGS maps have elevations to numerous decimal places. Round to nearest tenth. + #NWS has maps to nearest tenth, for example HARP1 is both USGS and NWS, the USGS maps are to the hundredth of foot and NWS are to tenth. + df['elevation'] = round(df['elevation'],1) + #Assume flow source is supplied, if it is interpolated, this column will be changed later on. + df['flow_source'] = 'supplied by USGS' + #Accomodate for vdsg1 (upon inspection WRDS API reports thresholds in elevation instead of stage for this site) + if code == 'vdsg1': + df['stage'] = df['elevation'] + f.write(f'{code} : Custom workaround because thresholds are reported as elevations\n') + + #Define rating curve as empty dataframe, populate if needed. + rating_curve = pd.DataFrame() + #If flows are missing from the grid metadata, then interpolate flows using NWS or USGS rating curve + if df['flow'].isnull().all(): + #get entire rating curve, same source as interpolated threshold flows (USGS Rating Depot first then NRLDB rating curve). + if rating_curve_source == 'NRLDB': + site = [code] + elif rating_curve_source == 'USGS Rating Depot': + site = [metadata.get('identifiers').get('usgs_site_code')] + + rating_curve = get_rating_curve(rating_curve_url, site) + + #If rating curve is not present, skip site + if rating_curve.empty: + f.write(f'{code} : Skipping because no rating curve\n') + continue + #Add elevation fields to rating curve + #Add field with vertical coordinate system + vcs = datum_data['vcs'] + if not vcs: + vcs = 'Unspecified, Assumed NAVD88' + rating_curve['vcs'] = vcs + + #Add field with original datum + rating_curve['datum'] = datum + + #If VCS is NGVD29 add rating curve elevation (in NGVD) as well as the NAVD88 datum + if vcs in ['NGVD29', 'NGVD 1929']: + #Add field with raw elevation conversion (datum + stage) + rating_curve['elevation_ngvd29'] = rating_curve['stage'] + datum + #Add field with adjusted NAVD88 datum + rating_curve['datum_navd88'] = datum88 + #Add field with NAVD88 elevation + rating_curve['elevation_navd88'] = rating_curve['stage'] + datum88 + # sort inundation grids in ascending order based on stage + df.sort_values(by = 'elevation', ascending = True, inplace = True) + #interpolate based on stage (don't need elevation because we have stage of floodgrid) + df['flow'] = np.interp(df['elevation'], rating_curve['elevation_navd88'], rating_curve['flow'], left = np.nan, right = np.nan) + #Overwrite flow source to reflect interpolation from rc + df['flow_source'] = f'interpolated from {rating_curve_source} rating curve' + + #Select the appropriate threshold grid for evaluation. Using the supplied threshold stages and the calculated map stages. + grids,grid_flows = select_grids(df, stages, datum88, 1.1) + + #Obtain NWM segments that are on ms to apply flows + segments = get_nwm_segs(metadata) + site_ms_segs = set(segments).intersection(ms_segs) + segments = list(site_ms_segs) + #Preprocess grids and export to file and create flow file. + try: + #for each threshold + for i in threshold_categories: + #Obtain the flow and grid associated with threshold as well as extent grid which serves as the domain. + flow = grid_flows[i] + grid = grids[i] + extent = grids['extent'] + #Make sure that flow and flow grid are valid + if not grid in ['No Map', 'No Threshold', 'No Flow']: + #Define output directory (to be created later) + outputdir = destination / huc / code / i + + #Create Binary Grids, first create domain of analysis, then create binary grid + + #Domain extent is largest floodmap in the static library WITH holes filled + filled_domain_raster = outputdir.parent / f'{code}_filled_orig_domain.tif' + #Create a domain raster if it does not exist. + if not filled_domain_raster.exists(): + #Open extent data as rasterio object + domain = rasterio.open(extent) + domain_profile = domain.profile + #Domain should have donut holes removed + process_extent(domain, domain_profile, output_raster = filled_domain_raster) + + #Open domain raster as rasterio object + filled_domain = rasterio.open(filled_domain_raster) + filled_domain_profile = filled_domain.profile + + #Open benchmark data as a rasterio object. + benchmark = rasterio.open(grid) + benchmark_profile = benchmark.profile + + #Create the binary benchmark raster + boolean_benchmark, boolean_profile = process_grid(benchmark, benchmark_profile, filled_domain, filled_domain_profile, reference_raster) + + #Output binary benchmark grid and flow file to destination + outputdir.mkdir(parents = True, exist_ok = True) + output_raster = outputdir / (f'ahps_{code}_huc_{huc}_extent_{i}.tif') + with rasterio.Env(): + with rasterio.open(output_raster, 'w', **boolean_profile) as dst: + dst.write(boolean_benchmark,1) + + #Close datasets + domain.close() + filled_domain.close() + benchmark.close() + + #Create the guts of the flow file. + flow_info = flow_data(segments,flow) + #Write out the flow file to csv + output_flow_file = outputdir / (f'ahps_{code}_huc_{huc}_flows_{i}.csv') + flow_info.to_csv(output_flow_file, index = False) + + except Exception as e: + f.write(f'{code} : Error preprocessing benchmark\n{repr(e)}\n') + f.write(traceback.format_exc()) + f.write('\n') + print(traceback.format_exc()) + #Wrapup for ahps sites that were processed. + ahps_directory = destination / huc / code + if ahps_directory.exists(): + #Delete original filled domain raster (it is an intermediate file to create benchmark data) + orig_domain_grid = ahps_directory / f'{code}_filled_orig_domain.tif' + orig_domain_grid.unlink() + #Create domain shapefile from any benchmark grid for site (each benchmark has domain footprint, value = 0). + filled_extent = list(ahps_directory.rglob('*_extent_*.tif'))[0] + domain_gpd = raster_to_feature(grid = filled_extent, profile_override = False, footprint_only = True) + domain_gpd['nws_lid'] = code + domain_gpd.to_file(ahps_directory / f'{code}_domain.shp') + #Populate attribute information for site + grids_attributes = pd.DataFrame(data=grids.items(), columns = ['magnitude','path']) + flows_attributes = pd.DataFrame(data=grid_flows.items(), columns=['magnitude','grid_flow_cfs']) + threshold_attributes = pd.DataFrame(data=stages.items(), columns = ['magnitude','magnitude_stage']) + #merge dataframes + attributes = grids_attributes.merge(flows_attributes, on = 'magnitude') + attributes = attributes.merge(threshold_attributes, on = 'magnitude') + attributes = attributes.merge(df[['path','stage','elevation', 'flow_source']], on = 'path') + #Strip out sensitive paths and convert magnitude stage to elevation + attributes['path'] = attributes['path'].apply(lambda x :Path(x).name) + attributes['magnitude_elev_navd88']=(datum88 + attributes['magnitude_stage']).astype(float).round(1) + #Add general site information + attributes['nws_lid'] = code + attributes['wfo'] = metadata['nws_data']['wfo'] + attributes['rfc'] = metadata['nws_data']['rfc'] + attributes['state'] = metadata['nws_data']['state'] + attributes['huc'] = huc + #Rename and Reorder columns + attributes.rename(columns = {'path':'grid_name', 'flow_source':'grid_flow_source','stage':'grid_stage','elevation':'grid_elev_navd88'}, inplace = True) + attributes = attributes[['nws_lid','wfo','rfc','state','huc','magnitude','magnitude_stage','magnitude_elev_navd88','grid_name','grid_stage','grid_elev_navd88', 'grid_flow_cfs','grid_flow_source']] + #Save attributes to csv + attributes.to_csv(ahps_directory / f'{code}_attributes.csv', index = False) + + #if rating_curve generated, write the rating curve to a file + if not rating_curve.empty: + rating_curve_output = ahps_directory / (f'{code}_rating_curve.csv') + rating_curve['lat'] = datum_data['lat'] + rating_curve['lon'] = datum_data['lon'] + rating_curve.to_csv(rating_curve_output, index = False) + f.write(f'{code} : Rating curve needed to interpolate flow\n') + + #Write the interpolated flows to file + df_output = ahps_directory / (f'{code}_flows.csv') + df.to_csv(df_output, index = False) + + else: + f.write(f'{code} : Unable to evaluate site, missing all flows\n') + + f.close() + + #Combine all attribute files + attribute_files = list(destination.rglob('*_attributes.csv')) + all_attributes = pd.DataFrame() + for i in attribute_files: + attribute_df = pd.read_csv(i, dtype={'huc':str}) + all_attributes = all_attributes.append(attribute_df) + if not all_attributes.empty: + all_attributes.to_csv(destination / 'attributes.csv', index = False) + + return + +if __name__ == '__main__': + #Parse arguments + parser = argparse.ArgumentParser(description = 'Create preprocessed USGS benchmark datasets at AHPS locations.') + parser.add_argument('-s', '--source_dir', help = 'Workspace where all source data is located.', required = True) + parser.add_argument('-d', '--destination', help = 'Directory where outputs are to be stored', required = True) + parser.add_argument('-r', '--reference_raster', help = 'reference raster used for benchmark raster creation', required = True) + args = vars(parser.parse_args()) + + + #Run get_env_paths and static_flow_lids + API_BASE_URL, EVALUATED_SITES_CSV, WBD_LAYER, USGS_METADATA_URL = get_env_paths() + preprocess_usgs(**args) \ No newline at end of file diff --git a/tests/preprocess/preprocess_benchmark.py b/tools/preprocess_benchmark.py old mode 100644 new mode 100755 similarity index 72% rename from tests/preprocess/preprocess_benchmark.py rename to tools/preprocess_benchmark.py index 4d6d860b0..81a65db2d --- a/tests/preprocess/preprocess_benchmark.py +++ b/tools/preprocess_benchmark.py @@ -1,9 +1,4 @@ -# -*- coding: utf-8 -*- -""" -Created on Thu Jul 23 15:17:04 2020 - -@author: trevor.grout -""" +#!/usr/bin/env python3 import rasterio from rasterio.warp import calculate_default_transform, reproject, Resampling @@ -13,7 +8,7 @@ def preprocess_benchmark_static(benchmark_raster, reference_raster, out_raster_path = None): ''' - This function will preprocess a benchmark dataset for purposes of evaluating FIM output. A benchmark dataset will be transformed using properties (CRS, resolution) from an input reference dataset. The benchmark raster will also be converted to a boolean (True/False) raster with inundated areas (True or 1) and dry areas (False or 0). + This function will preprocess a benchmark dataset for purposes of evaluating FIM output. A benchmark dataset will be transformed using properties (CRS, resolution) from an input reference dataset. The benchmark raster will also be converted to a boolean (True/False) raster with inundated areas (True or 1) and dry areas (False or 0). Parameters ---------- @@ -32,59 +27,59 @@ def preprocess_benchmark_static(benchmark_raster, reference_raster, out_raster_p Raster profile information for the preprocessed benchmark array (required for writing to output dataset). ''' - #Open and read raster and benchmark rasters + # Open and read raster and benchmark rasters reference = rasterio.open(reference_raster) benchmark = rasterio.open(benchmark_raster) - benchmark_arr = benchmark.read(1) + benchmark_arr = benchmark.read(1) - #Set arbitrary no data value that is not possible value of the benchmark dataset. This will be reassigned later. + # Set arbitrary no data value that is not possible value of the benchmark dataset. This will be reassigned later nodata_value = -2147483648 - - #Determine the new transform and dimensions of reprojected/resampled raster. + + # Determine the new transform and dimensions of reprojected/resampled raster new_transform, new_width, new_height = calculate_default_transform(benchmark.crs, reference.crs, benchmark.width, benchmark.height, *benchmark.bounds, resolution = reference.res) - #Define an empty array that is same dimensions as output by the "calculate_default_transform" command. + # Define an empty array that is same dimensions as output by the "calculate_default_transform" command benchmark_projected = np.empty((new_height,new_width), dtype=np.int32) - #Reproject and resample the benchmark dataset. Bilinear resampling due to continuous depth data. - reproject(benchmark_arr, + # Reproject and resample the benchmark dataset. Bilinear resampling due to continuous depth data + reproject(benchmark_arr, destination = benchmark_projected, - src_transform = benchmark.transform, + src_transform = benchmark.transform, src_crs = benchmark.crs, src_nodata = benchmark.nodata, - dst_transform = new_transform, + dst_transform = new_transform, dst_crs = reference.crs, dst_nodata = nodata_value, dst_resolution = reference.res, resampling = Resampling.bilinear) - #Convert entire depth grid to boolean (1 = Flood, 0 = No Flood) + # Convert entire depth grid to boolean (1 = Flood, 0 = No Flood) boolean_benchmark = np.where(benchmark_projected != nodata_value, 1, 0) - #Update profile (data type, NODATA, transform, width/height). + #Update profile (data type, NODATA, transform, width/height) profile = reference.profile profile.update(transform = new_transform) profile.update(dtype = rasterio.int8) - profile.update(nodata = 2) #Update NODATA to some integer so we can keep int8 datatype. There are no NODATA in the raster dataset. + profile.update(nodata = 2) #Update NODATA to some integer so we can keep int8 datatype. There are no NODATA in the raster dataset profile.update (width = new_width) profile.update(height = new_height) - #Write out preprocessed benchmark array to raster if path is supplied - if out_raster_path is not None: - with rasterio.Env(): - #Write out reassigned values to raster dataset. + # Write out preprocessed benchmark array to raster if path is supplied + if out_raster_path is not None: + with rasterio.Env(): + # Write out reassigned values to raster dataset with rasterio.open(out_raster_path, 'w', **profile) as dst: - dst.write(boolean_benchmark.astype('int8'),1) + dst.write(boolean_benchmark.astype('int8'),1) return boolean_benchmark.astype('int8'), profile if __name__ == '__main__': - #Parse arguments + # Parse arguments parser = argparse.ArgumentParser(description = 'Preprocess BLE grids (in tiff format) for use in run_test_cast.py. Preprocessing includes reprojecting and converting to boolean raster (1 = Flooding, 0 = No Flooding)') parser.add_argument('-b','--benchmark-raster', help = 'BLE depth or water surface elevation grid (in GTiff format).', required = True) parser.add_argument('-r', '--reference-raster', help = 'Benchmark will use reference raster to set CRS and resolution to reference raster CRS.', required = True) parser.add_argument('-o', '--out-raster-path', help = 'Output raster path (include name and extension).', required = True) - #Extract to dictionary and assign to variables. + # Extract to dictionary and assign to variables args = vars(parser.parse_args()) - #Run preprocess benchmark function + # Run preprocess benchmark function preprocess_benchmark_static(**args) diff --git a/tools/preprocess_download_usgs_grids.py b/tools/preprocess_download_usgs_grids.py new file mode 100644 index 000000000..332a30ed1 --- /dev/null +++ b/tools/preprocess_download_usgs_grids.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 +import urllib.request +from pathlib import Path +from dotenv import load_dotenv +import os +import argparse +import requests +from collections import defaultdict +import urllib +import pandas as pd + +load_dotenv() +USGS_DOWNLOAD_URL = os.getenv("USGS_DOWNLOAD_URL") +USGS_METADATA_URL = os.getenv("USGS_METADATA_URL") +EVALUATED_SITES_CSV = os.getenv("EVALUATED_SITES_CSV") +############################################################################### +#Get all usgs grids available for download. This step is required because the grid metadata API returns gridID as an integer and truncates leading zeros found in grid names. +############################################################################### +def get_all_usgs_gridnames(): + ''' + Retrieve all the available grids for download from USGS. This is necessary as the grid metadata available from USGS API doesn't preserve leading zeros. + + Returns + ------- + grid_lookup : collections.defaultdict + Dictionary with shortname as the key and a list of gridnames associated with a given shortname as values. + ''' + + #Grid names are split between 4 websites + sites = ['grids_1', 'grids_2', 'grids_3', 'grids_4'] + #Append all grid names to this variable + grid_names = [] + #loop through each site and append the grid name to a list. + for i in sites: + #Get gridnames + url = f'{USGS_METADATA_URL}/server/rest/services/FIMMapper/{i}/MapServer?f=pjson' + response = requests.get(url) + site_json = response.json() + info = site_json['layers'] + #Loop through all grid info and extract the grid name. + for i in info: + grid_name = i['name'] + grid_names.append(grid_name) + #Create dictionary with key of shortname and values being list of grids available. + grid_lookup = defaultdict(list) + for i in grid_names: + #define key (shortname) and value (gridname) + key = i.split('_')[0] + value = i + grid_lookup[key].append(value) + return grid_lookup +############################################################################### +#Get USGS Site metadata +############################################################################### +def usgs_site_metadata(code): + ''' + Retrieves site metadata from USGS API and saves output as dictionary. Information used includes shortname and site number. + + Parameters + ---------- + code : STR + AHPS code. + USGS_METADATA_URL : STR + URL for USGS datasets. + + Returns + ------- + site_metadata : DICT + Output metadata for an AHPS site. + ''' + # Make sure code is lower case + code = code.lower() + # Get site metadata from USGS API using ahps code + site_url = f'{USGS_METADATA_URL}/server/rest/services/FIMMapper/sites/MapServer/0/query?where=AHPS_ID+%3D+%27{code}%27&text=&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&relationParam=&outFields=*&returnGeometry=false&returnTrueCurves=false&maxAllowableOffset=&geometryPrecision=&outSR=&having=&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&historicMoment=&returnDistinctValues=false&resultOffset=&resultRecordCount=&queryByDistance=&returnExtentOnly=false&datumTransformation=¶meterValues=&rangeValues=&quantizationParameters=&f=pjson' + #Get data from API + response = requests.get(site_url) + #If response is valid, then get metadata and save to dictionary + if response.ok: + response_json = response.json() + site_metadata = response_json['features'][0]['attributes'] + return site_metadata +######################################################################## +#Get USGS Benchmark Grids +######################################################################## +def obtain_usgs_data(workspace): + ''' + Download GRIDS from USGS FIM studies + + Parameters + ---------- + workspace : STR + Output directory where grids are placed. + + Returns + ------- + None. + + ''' + + + #Define workspace where output data is downloaded to + workspace = Path(workspace) + #Get all names of grids available for download from USGS website. + grid_lookup = get_all_usgs_gridnames() + #List of target ahps codes. In "ahps_dictionary.py" we defined a dictionary (ahps_lookup) that contains all ahps codes and their sources. + target_ahps_codes = pd.read_csv(EVALUATED_SITES_CSV) + target_ahps_codes = target_ahps_codes.query('Source in ["Both","USGS"]')['Total_List'].to_list() + #Loop through all codes in the target_ahps_codes list. + all_messages = [] + for code in target_ahps_codes: + #Get metadata information related to ahps code from USGS API. + code_metadata = usgs_site_metadata(code) + #From code_metadata get the shortname and site_no associated with the code. + shortname = code_metadata['SHORT_NAME'] + site_no = code_metadata['SITE_NO'] + #Define the output location for all grids and create if it doesn't exist. + dest_dir = workspace / code.lower() / 'depth_grids' + dest_dir.mkdir(parents = True, exist_ok = True) + #Get list of all available grids for download using the grid_lookup dictionary + gridnames = grid_lookup[shortname] + #Loop through all available grids for download, download them, and save to defined location. + for gridname in gridnames: + print(f'working on {gridname}') + gridid = gridname.split('_')[1] + #Define a filled gridID that has leading zeros out to 4 digits. + filled_gridid = gridid.zfill(4) + #Download gridded data from the USGS s3 website. The files will be copied specified directory and the GRIDID will have 4 digits with leading zeros. + base_url = f'{USGS_DOWNLOAD_URL}/FIM/tmp1/fimgrids2iwrss/' + #Each grid dataset has these file extensions. Download each file + extensions = ['.tif', '.tfw', '.tif.aux.xml', '.tif.ovr', '.tif.xml'] + #Loop through each extension type and download. + for gridext in extensions: + #Define the url associated with each grid + url = base_url + gridname + gridext + #Define the output file path of the grid. The grid filename uses the filled gridID. This resolves issues down the road of USGS grid metadata information storing the gridid as a number and truncating leading zeros from the gridname. + saved_grid_path = dest_dir / (f'{shortname}_{filled_gridid}{gridext}') + #Check to see if file has already been downloaded + if not saved_grid_path.is_file(): + #If file hasn't been downloaded, download it. If there was an error downloading, make note. + try: + urllib.request.urlretrieve(url, saved_grid_path) + message = f'{gridname} downloaded' + all_messages.append(message) + except: + message = f'{gridname} error downloading' + all_messages.append(message) + #If file exists make note of it. + else: + message = f'skipping {gridname}, exists on file' + all_messages.append(message) + return + +if __name__ == '__main__': + #Parse arguments + parser = argparse.ArgumentParser(description = 'Download Grid data associated with USGS FIM studies.') + parser.add_argument('-w', '--workspace', help = 'Workspace where all outputs will be saved.', required = True) + args = vars(parser.parse_args()) + + #Download datasets + obtain_usgs_data(**args) + + diff --git a/tests/preprocess/preprocess_fimx.py b/tools/preprocess_fimx.py old mode 100644 new mode 100755 similarity index 78% rename from tests/preprocess/preprocess_fimx.py rename to tools/preprocess_fimx.py index 344fecf7d..cad6058d0 --- a/tests/preprocess/preprocess_fimx.py +++ b/tools/preprocess_fimx.py @@ -1,9 +1,5 @@ -# -*- coding: utf-8 -*- -""" -Created on Fri Jul 24 13:50:59 2020 +#!/usr/bin/env python3 -@author: trevor.grout -""" import rasterio from rasterio.warp import calculate_default_transform, reproject, Resampling from rasterio import features @@ -47,74 +43,75 @@ def fimx_to_fim3(catchments_path, raster_value_field, hand_raster_path, template Preprocessed catchment raster profile. ''' - - - #Read in template raster as band object. + + + # Read in template raster as band object reference = rasterio.open(template_raster) - - #Step 1: Convert HAND grid - #Read in the hand raster + + ## Step 1: Convert HAND grid + # Read in the hand raster hand = rasterio.open(hand_raster_path) hand_arr = hand.read(1) - #Determine the new transform and dimensions of reprojected raster (CRS = reference raster). + #Determine the new transform and dimensions of reprojected raster (CRS = reference raster) new_transform, new_width, new_height = calculate_default_transform(hand.crs, reference.crs, hand.width, hand.height, *hand.bounds) - #Define an empty array that is same dimensions as output by the "calculate_default_transform" command. - hand_proj = np.empty((new_height,new_width), dtype=np.float) - #Reproject to target dataset (resample method is bilinear due to elevation type data). + # Define an empty array that is same dimensions as output by the "calculate_default_transform" command + hand_proj = np.empty((new_height,new_width), dtype=np.float) + # Reproject to target dataset (resample method is bilinear due to elevation type data) hand_nodata_value = -2147483648 - reproject(hand_arr, + reproject(hand_arr, destination = hand_proj, - src_transform = hand.transform, + src_transform = hand.transform, src_crs = hand.crs, src_nodata = hand.nodata, - dst_transform = new_transform, + dst_transform = new_transform, dst_crs = reference.crs, dst_nodata = hand_nodata_value, dst_resolution = hand.res, resampling = Resampling.bilinear) - #Update profile data type and no data value. + + # Update profile data type and no data value hand_profile = reference.profile hand_profile.update(dtype = rasterio.float32) hand_profile.update(nodata = hand_nodata_value) hand_profile.update(width = new_width) hand_profile.update(height = new_height) hand_profile.update(transform = new_transform) - - #Step 2: Catchments to Polygons (same extent as the HAND raster) - #Read in the catchment layer to geopandas dataframe and convert to same CRS as reference raster. + + ## Step 2: Catchments to Polygons (same extent as the HAND raster) + # Read in the catchment layer to geopandas dataframe and convert to same CRS as reference raster gdbpath, layername = os.path.split(catchments_path) gdb_layer=gpd.read_file(gdbpath, driver='FileGDB', layer=layername) proj_gdb_layer = gdb_layer.to_crs(reference.crs) - #Prepare vector data to be written to raster. - shapes = list(zip(proj_gdb_layer['geometry'],proj_gdb_layer[raster_value_field].astype('int32'))) - #Write vector data to raster image. Fill raster with zeros for areas that do not have data. We will set nodata to be zero later. - catchment_proj = features.rasterize(((geometry, value) for geometry, value in shapes), fill = 0, out_shape=hand_proj.shape, transform=hand_profile['transform'], dtype = 'int32' ) - #Save raster image to in-memory dataset. Reset dtype and nodata values. + # Prepare vector data to be written to raster + shapes = list(zip(proj_gdb_layer['geometry'],proj_gdb_layer[raster_value_field].astype('int32'))) + # Write vector data to raster image. Fill raster with zeros for areas that do not have data. We will set nodata to be zero later + catchment_proj = features.rasterize(((geometry, value) for geometry, value in shapes), fill = 0, out_shape=hand_proj.shape, transform=hand_profile['transform'], dtype = 'int32' ) + # Save raster image to in-memory dataset. Reset dtype and nodata values. catchment_profile = hand_profile.copy() catchment_profile.update(dtype = 'int32') catchment_profile.update(nodata=0) - - #Step 3: Union of NODATA locations applied to both HAND and Catchment grids. + + ## Step 3: Union of NODATA locations applied to both HAND and Catchment grids catchment_masked = np.where(np.logical_or(hand_proj == hand_profile['nodata'], catchment_proj == catchment_profile['nodata']), catchment_profile['nodata'],catchment_proj) - #Assign NODATA to hand where both catchment and hand have NODATA else assign hand values. + # Assign NODATA to hand where both catchment and hand have NODATA else assign hand values. hand_masked = np.where(np.logical_or(hand_proj == hand_profile['nodata'], catchment_proj == catchment_profile['nodata']), hand_profile['nodata'],hand_proj) - #Step 4: Write out hand and catchment rasters to file if path is specified + ## Step 4: Write out hand and catchment rasters to file if path is specified if out_hand_path is not None: - os.makedirs(os.path.split(out_hand_path)[0], exist_ok = True) + os.makedirs(os.path.split(out_hand_path)[0], exist_ok = True) with rasterio.Env(): with rasterio.open(out_hand_path, 'w', **hand_profile) as hnd_dst: hnd_dst.write(hand_masked.astype('float32'),1) if out_catchment_path is not None: - os.makedirs(os.path.split(out_catchment_path)[0], exist_ok = True) + os.makedirs(os.path.split(out_catchment_path)[0], exist_ok = True) with rasterio.Env(): with rasterio.open(out_catchment_path, 'w', **catchment_profile) as cat_dst: - cat_dst.write(catchment_masked.astype('int32'),1) - + cat_dst.write(catchment_masked.astype('int32'),1) + return hand_masked, hand_profile, catchment_masked, catchment_profile if __name__ == '__main__': - #Parse arguments + # Parse arguments parser = argparse.ArgumentParser(description = 'Preprocess FIM 1 and FIM 2 HAND and Catchment grids to be compatible with FIM 3.') parser.add_argument('-c','--catchments-path', help = 'Path to catchments vector file', required = True) parser.add_argument('-f', '--raster-value-field', help = 'Attribute ID field from which raster values will be assigned. Typically this will be "HydroID" for FIM2 and "feature_ID" for fim 1.', required = True) @@ -122,8 +119,7 @@ def fimx_to_fim3(catchments_path, raster_value_field, hand_raster_path, template parser.add_argument('-t', '--template-raster', help = 'Path to a template raster. Properties (CRS, resolution) of the template raster will be used to preprocess HAND and Catchments grids', required = True) parser.add_argument('-oh', '--out-hand-path', help = 'Path to the output HAND raster. Raster must be named "rem_clipped_zeroed_masked.tif', required = True) parser.add_argument('-oc', '--out-catchment-path', help = 'Path to the output Catchment raster. Raster must be named "gw_catchments_reaches_clipped_addedAttributes.tif"', required = True) - #Extract to dictionary and assign to variables. + # Extract to dictionary and assign to variables args = vars(parser.parse_args()) - #Run fimx to fim3 function. + # Run fimx to fim3 function fimx_to_fim3(**args) - diff --git a/tools/rating_curve_comparison.py b/tools/rating_curve_comparison.py new file mode 100755 index 000000000..d2adeba08 --- /dev/null +++ b/tools/rating_curve_comparison.py @@ -0,0 +1,454 @@ +#!/usr/bin/env python3 + +import os +import sys +import pandas as pd +import numpy as np +import argparse +import matplotlib.pyplot as plt +import seaborn as sns +from functools import reduce +from multiprocessing import Pool +from os.path import isfile, join +import shutil +import warnings +from pathlib import Path +import time +warnings.simplefilter(action='ignore', category=FutureWarning) + +""" + Plot Rating Curves and Compare to USGS Gages + + Parameters + ---------- + fim_dir : str + Directory containing FIM output folders. + output_dir : str + Directory containing rating curve plots and tables. + usgs_gages_filename : str + File name of USGS rating curves. + nwm_flow_dir : str + Directory containing NWM recurrence flows files. + number_of_jobs : str + Number of jobs. + stat_groups : str + string of columns to group eval metrics. +""" + +def check_file_age(file): + ''' + Checks if file exists, determines the file age, and recommends + updating if older than 1 month. + + Returns + ------- + None. + + ''' + file = Path(file) + if file.is_file(): + modification_time = file.stat().st_mtime + current_time = time.time() + file_age_days = (current_time - modification_time)/86400 + if file_age_days > 30: + check = f'{file.name} is {int(file_age_days)} days old, consider updating.\nUpdate with rating_curve_get_usgs_curves.py' + else: + check = f'{file.name} is {int(file_age_days)} days old.' + + return check + +# recurr_intervals = ['recurr_1_5_cms.csv','recurr_5_0_cms.csv','recurr_10_0_cms.csv'] + +def generate_rating_curve_metrics(args): + + elev_table_filename = args[0] + hydrotable_filename = args[1] + usgs_gages_filename = args[2] + usgs_recurr_stats_filename = args[3] + nwm_recurr_data_filename = args[4] + rc_comparison_plot_filename = args[5] + nwm_flow_dir = args[6] + catfim_flows_filename = args[7] + huc = args[8] + + elev_table = pd.read_csv(elev_table_filename,dtype={'location_id': str}) + hydrotable = pd.read_csv(hydrotable_filename,dtype={'HUC': str,'feature_id': str}) + usgs_gages = pd.read_csv(usgs_gages_filename,dtype={'location_id': str}) + + # Join rating curves with elevation data + hydrotable = hydrotable.merge(elev_table, on="HydroID") + relevant_gages = list(hydrotable.location_id.unique()) + usgs_gages = usgs_gages[usgs_gages['location_id'].isin(relevant_gages)] + usgs_gages = usgs_gages.reset_index(drop=True) + + if len(usgs_gages) > 0: + + # Adjust rating curve to elevation + hydrotable['elevation_ft'] = (hydrotable.stage + hydrotable.dem_adj_elevation) * 3.28084 # convert from m to ft + # hydrotable['raw_elevation_ft'] = (hydrotable.stage + hydrotable.dem_elevation) * 3.28084 # convert from m to ft + hydrotable['discharge_cfs'] = hydrotable.discharge_cms * 35.3147 + usgs_gages = usgs_gages.rename(columns={"flow": "discharge_cfs", "elevation_navd88": "elevation_ft"}) + + hydrotable['source'] = "FIM" + usgs_gages['source'] = "USGS" + limited_hydrotable = hydrotable.filter(items=['location_id','elevation_ft','discharge_cfs','source']) + select_usgs_gages = usgs_gages.filter(items=['location_id', 'elevation_ft', 'discharge_cfs','source']) + + rating_curves = limited_hydrotable.append(select_usgs_gages) + + # Add stream order + stream_orders = hydrotable.filter(items=['location_id','str_order']).drop_duplicates() + rating_curves = rating_curves.merge(stream_orders, on='location_id') + rating_curves['str_order'] = rating_curves['str_order'].astype('int') + + # plot rating curves + generate_facet_plot(rating_curves, rc_comparison_plot_filename) + + # NWM recurr intervals + recurr_1_5_yr_filename = join(nwm_flow_dir,'recurr_1_5_cms.csv') + recurr_5_yr_filename = join(nwm_flow_dir,'recurr_5_0_cms.csv') + recurr_10_yr_filename = join(nwm_flow_dir,'recurr_10_0_cms.csv') + + # Update column names + recurr_1_5_yr = pd.read_csv(recurr_1_5_yr_filename,dtype={'feature_id': str}) + recurr_1_5_yr = recurr_1_5_yr.rename(columns={"discharge": "1.5"}) + recurr_5_yr = pd.read_csv(recurr_5_yr_filename,dtype={'feature_id': str}) + recurr_5_yr = recurr_5_yr.rename(columns={"discharge": "5.0"}) + recurr_10_yr = pd.read_csv(recurr_10_yr_filename,dtype={'feature_id': str}) + recurr_10_yr = recurr_10_yr.rename(columns={"discharge": "10.0"}) + + # Merge NWM recurr intervals into a single layer + nwm_recurr_intervals_all = reduce(lambda x,y: pd.merge(x,y, on='feature_id', how='outer'), [recurr_1_5_yr, recurr_5_yr, recurr_10_yr]) + nwm_recurr_intervals_all = pd.melt(nwm_recurr_intervals_all, id_vars=['feature_id'], value_vars=['1.5','5.0','10.0'], var_name='recurr_interval', value_name='discharge_cms') + + # Append catfim data (already set up in format similar to nwm_recurr_intervals_all) + cat_fim = pd.read_csv(catfim_flows_filename, dtype={'feature_id':str}) + nwm_recurr_intervals_all = nwm_recurr_intervals_all.append(cat_fim) + + # Convert discharge to cfs and filter + nwm_recurr_intervals_all['discharge_cfs'] = nwm_recurr_intervals_all.discharge_cms * 35.3147 + nwm_recurr_intervals_all = nwm_recurr_intervals_all.filter(items=['discharge_cfs', 'recurr_interval','feature_id']).drop_duplicates() + + # Identify unique gages + usgs_crosswalk = hydrotable.filter(items=['location_id', 'feature_id']).drop_duplicates() + + nwm_recurr_data_table = pd.DataFrame() + usgs_recurr_data = pd.DataFrame() + + # Interpolate USGS/FIM elevation at each gage + for index, gage in usgs_crosswalk.iterrows(): + + # Interpolate USGS elevation at NWM recurrence intervals + usgs_rc = rating_curves.loc[(rating_curves.location_id==gage.location_id) & (rating_curves.source=="USGS")] + + if len(usgs_rc) <1: + print(f"missing USGS rating curve data for usgs station {gage.location_id} in huc {huc}") + continue + + str_order = np.unique(usgs_rc.str_order).item() + feature_id = str(gage.feature_id) + + usgs_pred_elev = get_reccur_intervals(usgs_rc, usgs_crosswalk,nwm_recurr_intervals_all) + + # Handle sites missing data + if len(usgs_pred_elev) <1: + print(f"missing USGS elevation data for usgs station {gage.location_id} in huc {huc}") + continue + + # Clean up data + usgs_pred_elev['location_id'] = gage.location_id + usgs_pred_elev = usgs_pred_elev.filter(items=['location_id','recurr_interval', 'discharge_cfs','pred_elev']) + usgs_pred_elev = usgs_pred_elev.rename(columns={"pred_elev": "USGS"}) + + # Interpolate FIM elevation at NWM recurrence intervals + fim_rc = rating_curves.loc[(rating_curves.location_id==gage.location_id) & (rating_curves.source=="FIM")] + + if len(fim_rc) <1: + print(f"missing FIM rating curve data for usgs station {gage.location_id} in huc {huc}") + continue + + fim_pred_elev = get_reccur_intervals(fim_rc, usgs_crosswalk,nwm_recurr_intervals_all) + + # Handle sites missing data + if len(fim_pred_elev) <1: + print(f"missing FIM elevation data for usgs station {gage.location_id} in huc {huc}") + continue + + # Clean up data + fim_pred_elev = fim_pred_elev.rename(columns={"pred_elev": "FIM"}) + fim_pred_elev = fim_pred_elev.filter(items=['recurr_interval', 'discharge_cfs','FIM']) + usgs_pred_elev = usgs_pred_elev.merge(fim_pred_elev, on=['recurr_interval','discharge_cfs']) + + # Add attributes + usgs_pred_elev['HUC'] = huc + usgs_pred_elev['HUC4'] = huc[0:4] + usgs_pred_elev['str_order'] = str_order + usgs_pred_elev['feature_id'] = feature_id + + # Melt dataframe + usgs_pred_elev = pd.melt(usgs_pred_elev, id_vars=['location_id','feature_id','recurr_interval','discharge_cfs','HUC','HUC4','str_order'], value_vars=['USGS','FIM'], var_name="source", value_name='elevation_ft') + nwm_recurr_data_table = nwm_recurr_data_table.append(usgs_pred_elev) + + # Interpolate FIM elevation at USGS observations + # fim_rc = fim_rc.merge(usgs_crosswalk, on="location_id") + # usgs_rc = usgs_rc.rename(columns={"elevation_ft": "USGS"}) + # + # # Sort stage in ascending order + # usgs_rc = usgs_rc.sort_values('USGS',ascending=True) + # + # # Interpolate FIM elevation at USGS observations + # usgs_rc['FIM'] = np.interp(usgs_rc.discharge_cfs.values, fim_rc['discharge_cfs'], fim_rc['elevation_ft'], left = np.nan, right = np.nan) + # usgs_rc = usgs_rc[usgs_rc['FIM'].notna()] + # usgs_rc = usgs_rc.drop(columns=["source"]) + # + # # Melt dataframe + # usgs_rc = pd.melt(usgs_rc, id_vars=['location_id','discharge_cfs','str_order'], value_vars=['USGS','FIM'], var_name="source", value_name='elevation_ft') + # + # if not usgs_rc.empty: + # usgs_recurr_data = usgs_recurr_data.append(usgs_rc) + + # Generate stats for all sites in huc + # if not usgs_recurr_data.empty: + # usgs_recurr_stats_table = calculate_rc_stats_elev(usgs_recurr_data) + # usgs_recurr_stats_table.to_csv(usgs_recurr_stats_filename,index=False) + + # # Generate plots (not currently being used) + # fim_elev_at_USGS_rc_plot_filename = join(dirname(rc_comparison_plot_filename),'FIM_elevations_at_USGS_rc_' + str(huc) +'.png') + # generate_facet_plot(usgs_recurr_data, fim_elev_at_USGS_rc_plot_filename) + + if not nwm_recurr_data_table.empty: + nwm_recurr_data_table.discharge_cfs = np.round(nwm_recurr_data_table.discharge_cfs,2) + nwm_recurr_data_table.elevation_ft = np.round(nwm_recurr_data_table.elevation_ft,2) + nwm_recurr_data_table.to_csv(nwm_recurr_data_filename,index=False) + + else: + print(f"no USGS data for gage(s): {relevant_gages} in huc {huc}") + +def aggregate_metrics(output_dir,procs_list,stat_groups): + + # agg_usgs_interp_elev_stats = join(output_dir,'agg_usgs_interp_elev_stats.csv') + agg_nwm_recurr_flow_elev = join(output_dir,'agg_nwm_recurr_flow_elevations.csv') + agg_nwm_recurr_flow_elev_stats = join(output_dir,f"agg_nwm_recurr_flow_elev_stats_{'_'.join(stat_groups)}.csv") + + # if os.path.isfile(agg_usgs_interp_elev_stats): + # os.remove(agg_usgs_interp_elev_stats) + if os.path.isfile(agg_nwm_recurr_flow_elev): + os.remove(agg_nwm_recurr_flow_elev) + if os.path.isfile(agg_nwm_recurr_flow_elev_stats): + os.remove(agg_nwm_recurr_flow_elev_stats) + + for huc in procs_list: + # if os.path.isfile(huc[3]): + # usgs_recurr_stats = pd.read_csv(huc[3]) + # + # # Write/append usgs_recurr_stats + # if os.path.isfile(agg_usgs_interp_elev_stats): + # usgs_recurr_stats.to_csv(agg_usgs_interp_elev_stats,index=False, mode='a',header=False) + # else: + # usgs_recurr_stats.to_csv(agg_usgs_interp_elev_stats,index=False) + + if os.path.isfile(huc[4]): + nwm_recurr_data = pd.read_csv(huc[4],dtype={'location_id': str, + 'feature_id': str}) + + # Write/append nwm_recurr_data + if os.path.isfile(agg_nwm_recurr_flow_elev): + nwm_recurr_data.to_csv(agg_nwm_recurr_flow_elev,index=False, mode='a',header=False) + else: + nwm_recurr_data.to_csv(agg_nwm_recurr_flow_elev,index=False) + + agg_stats = pd.read_csv(agg_nwm_recurr_flow_elev,dtype={'location_id': str, + 'feature_id': str}) + + agg_recurr_stats_table = calculate_rc_stats_elev(agg_stats,stat_groups) + + agg_recurr_stats_table.to_csv(agg_nwm_recurr_flow_elev_stats,index=False) + + +def generate_facet_plot(rc, plot_filename): + + # Filter FIM elevation based on USGS data + for gage in rc.location_id.unique(): + + min_elev = rc.loc[(rc.location_id==gage) & (rc.source=='USGS')].elevation_ft.min() + max_elev = rc.loc[(rc.location_id==gage) & (rc.source=='USGS')].elevation_ft.max() + + rc = rc.drop(rc[(rc.location_id==gage) & (rc.source=='FIM') & (rc.elevation_ft > (max_elev + 2))].index) + rc = rc.drop(rc[(rc.location_id==gage) & (rc.source=='FIM') & (rc.elevation_ft < min_elev - 2)].index) + + rc = rc.rename(columns={"location_id": "USGS Gage"}) + + ## Generate rating curve plots + num_plots = len(rc["USGS Gage"].unique()) + if num_plots > 3: + columns = num_plots // 3 + else: + columns = 1 + + sns.set(style="ticks") + g = sns.FacetGrid(rc, col="USGS Gage", hue="source", hue_order=['USGS','FIM'], sharex=False, sharey=False,col_wrap=columns) + g.map(sns.scatterplot, "discharge_cfs", "elevation_ft", palette="tab20c", marker="o") + g.set_axis_labels(x_var="Discharge (cfs)", y_var="Elevation (ft)") + + # Adjust the arrangement of the plots + g.fig.tight_layout(w_pad=1) + g.add_legend() + + plt.savefig(plot_filename) + plt.close() + + +def get_reccur_intervals(site_rc, usgs_crosswalk,nwm_recurr_intervals): + + usgs_site = site_rc.merge(usgs_crosswalk, on="location_id") + nwm_ids = len(usgs_site.feature_id.drop_duplicates()) + + if nwm_ids > 0: + + nwm_recurr_intervals = nwm_recurr_intervals.copy().loc[nwm_recurr_intervals.feature_id==usgs_site.feature_id.drop_duplicates().item()] + nwm_recurr_intervals['pred_elev'] = np.interp(nwm_recurr_intervals.discharge_cfs.values, usgs_site['discharge_cfs'], usgs_site['elevation_ft'], left = np.nan, right = np.nan) + + return nwm_recurr_intervals + + else: + return [] + + +def calculate_rc_stats_elev(rc,stat_groups=None): + + usgs_elev = "USGS" + src_elev = "FIM" + + # Collect any extra columns not associated with melt + col_index = list(rc.columns) + pivot_vars = ['source','elevation_ft'] + col_index = [col for col in col_index if col not in pivot_vars] + + # Unmelt elevation/source + rc_unmelt = (rc.set_index(col_index) + .pivot(columns="source")['elevation_ft'] + .reset_index() + .rename_axis(None, axis=1) + ) + + if stat_groups is None: + stat_groups = ['location_id'] + + # Calculate variables for NRMSE + rc_unmelt["yhat_minus_y"] = rc_unmelt[src_elev] - rc_unmelt[usgs_elev] + rc_unmelt["yhat_minus_y_squared"] = rc_unmelt["yhat_minus_y"] ** 2 + + # Calculate metrics by group + station_rc = rc_unmelt.groupby(stat_groups) + + # Calculate variables for NRMSE + sum_y_diff = station_rc.apply(lambda x: x["yhat_minus_y_squared"].sum())\ + .reset_index(stat_groups, drop = False).rename({0: "sum_y_diff"}, axis=1) + + # Determine number of events that are modeled + n = station_rc.apply(lambda x: x[usgs_elev].count())\ + .reset_index(stat_groups, drop = False).rename({0: "n"}, axis=1) + + # Determine the maximum/minimum USGS elevation + y_max = station_rc.apply(lambda x: x[usgs_elev].max())\ + .reset_index(stat_groups, drop = False).rename({0: "y_max"}, axis=1) + y_min = station_rc.apply(lambda x: x[usgs_elev].min())\ + .reset_index(stat_groups, drop = False).rename({0: "y_min"}, axis=1) + + # Collect variables for NRMSE + nrmse_table = reduce(lambda x,y: pd.merge(x,y, on=stat_groups, how='outer'), [sum_y_diff, n, y_max, y_min]) + nrmse_table_group = nrmse_table.groupby(stat_groups) + + # Calculate nrmse + nrmse = nrmse_table_group.apply(lambda x: ((x['sum_y_diff'] / x['n']) ** 0.5) / (x['y_max'] - x['y_min']))\ + .reset_index(stat_groups, drop = False).rename({0: "nrmse"}, axis=1) + + # Calculate Mean Absolute Depth Difference + mean_abs_y_diff = station_rc.apply(lambda x: (abs(x["yhat_minus_y"]).mean()))\ + .reset_index(stat_groups, drop = False).rename({0: "mean_abs_y_diff_ft"}, axis=1) + + # Calculate Percent Bias + percent_bias = station_rc.apply(lambda x: 100 * (x["yhat_minus_y"].sum() / x[usgs_elev].sum()))\ + .reset_index(stat_groups, drop = False).rename({0: "percent_bias"}, axis=1) + + rc_stat_table = reduce(lambda x,y: pd.merge(x,y, on=stat_groups, how='outer'), [nrmse, mean_abs_y_diff, percent_bias]) + + return rc_stat_table + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='generate rating curve plots and tables for FIM and USGS gages') + parser.add_argument('-fim_dir','--fim-dir', help='FIM output dir', required=True,type=str) + parser.add_argument('-output_dir','--output-dir', help='rating curves output folder', required=True,type=str) + parser.add_argument('-gages','--usgs-gages-filename',help='USGS rating curves',required=True,type=str) + parser.add_argument('-flows','--nwm-flow-dir',help='NWM recurrence flows dir',required=True,type=str) + parser.add_argument('-catfim', '--catfim-flows-filename', help='Categorical FIM flows file',required = True,type=str) + parser.add_argument('-j','--number-of-jobs',help='number of workers',required=False,default=1,type=int) + parser.add_argument('-group','--stat-groups',help='column(s) to group stats',required=False,type=str) + + args = vars(parser.parse_args()) + + fim_dir = args['fim_dir'] + output_dir = args['output_dir'] + usgs_gages_filename = args['usgs_gages_filename'] + nwm_flow_dir = args['nwm_flow_dir'] + catfim_flows_filename = args['catfim_flows_filename'] + number_of_jobs = args['number_of_jobs'] + stat_groups = args['stat_groups'] + + stat_groups = stat_groups.split() + procs_list = [] + + plots_dir = join(output_dir,'plots') + os.makedirs(plots_dir, exist_ok=True) + tables_dir = join(output_dir,'tables') + os.makedirs(tables_dir, exist_ok=True) + + #Check age of gages csv and recommend updating if older than 30 days. + print(check_file_age(usgs_gages_filename)) + + # Open log file + sys.__stdout__ = sys.stdout + log_file = open(join(output_dir,'rating_curve_comparison.log'),"w") + sys.stdout = log_file + + merged_elev_table = [] + huc_list = os.listdir(fim_dir) + for huc in huc_list: + + if huc != 'logs': + elev_table_filename = join(fim_dir,huc,'usgs_elev_table.csv') + hydrotable_filename = join(fim_dir,huc,'hydroTable.csv') + usgs_recurr_stats_filename = join(tables_dir,f"usgs_interpolated_elevation_stats_{huc}.csv") + nwm_recurr_data_filename = join(tables_dir,f"nwm_recurrence_flow_elevations_{huc}.csv") + rc_comparison_plot_filename = join(plots_dir,f"FIM-USGS_rating_curve_comparison_{huc}.png") + + if isfile(elev_table_filename): + procs_list.append([elev_table_filename, hydrotable_filename, usgs_gages_filename, usgs_recurr_stats_filename, nwm_recurr_data_filename, rc_comparison_plot_filename,nwm_flow_dir, catfim_flows_filename, huc]) + # Aggregate all of the individual huc elev_tables into one aggregate for accessing all data in one csv + read_elev_table = pd.read_csv(elev_table_filename) + read_elev_table['huc'] = huc + merged_elev_table.append(read_elev_table) + + # Output a concatenated elev_table to_csv + if merged_elev_table: + print(f"Creating aggregate elev table csv") + concat_elev_table = pd.concat(merged_elev_table) + concat_elev_table['thal_burn_depth_meters'] = concat_elev_table['dem_elevation'] - concat_elev_table['dem_adj_elevation'] + concat_elev_table.to_csv(join(output_dir,'agg_usgs_elev_table.csv'),index=False) + + # Initiate multiprocessing + print(f"Generating rating curve metrics for {len(procs_list)} hucs using {number_of_jobs} jobs") + with Pool(processes=number_of_jobs) as pool: + pool.map(generate_rating_curve_metrics, procs_list) + + print(f"Aggregating rating curve metrics for {len(procs_list)} hucs") + aggregate_metrics(output_dir,procs_list,stat_groups) + + print('Delete intermediate tables') + shutil.rmtree(tables_dir, ignore_errors=True) + + # Close log file + sys.stdout = sys.__stdout__ + log_file.close() diff --git a/tools/rating_curve_get_usgs_curves.py b/tools/rating_curve_get_usgs_curves.py new file mode 100644 index 000000000..cb8a33f56 --- /dev/null +++ b/tools/rating_curve_get_usgs_curves.py @@ -0,0 +1,370 @@ +#!/usr/bin/env python3 +import time +import pandas as pd +import geopandas as gpd +from pathlib import Path +from tools_shared_functions import get_metadata, get_datum, ngvd_to_navd_ft, get_rating_curve, aggregate_wbd_hucs, get_thresholds, flow_data +from dotenv import load_dotenv +import os +import argparse +import sys +sys.path.append('/foss_fim/src') +from utils.shared_variables import PREP_PROJECTION + +''' +This script calls the NOAA Tidal API for datum conversions. Experience shows that +running script outside of business hours seems to be most consistent way +to avoid API errors. Currently configured to get rating curve data within +CONUS. Tidal API call may need to be modified to get datum conversions for +AK, HI, PR/VI. +''' + +#import variables from .env file +load_dotenv() +API_BASE_URL = os.getenv("API_BASE_URL") +WBD_LAYER = os.getenv("WBD_LAYER") +EVALUATED_SITES_CSV = os.getenv("EVALUATED_SITES_CSV") +NWM_FLOWS_MS = os.getenv("NWM_FLOWS_MS") + +def get_all_active_usgs_sites(): + ''' + Compile a list of all active usgs gage sites that meet certain criteria. + Return a GeoDataFrame of all sites. + + Returns + ------- + None. + + ''' + #Get metadata for all usgs_site_codes that are active in the U.S. + metadata_url = f'{API_BASE_URL}/metadata' + #Define arguments to retrieve metadata and then get metadata from WRDS + select_by = 'usgs_site_code' + selector = ['all'] + must_include = 'usgs_data.active' + metadata_list, metadata_df = get_metadata(metadata_url, select_by, selector, must_include = must_include, upstream_trace_distance = None, downstream_trace_distance = None ) + + #Filter out sites based quality of site. These acceptable codes were initially + #decided upon and may need fine tuning. A link where more information + #regarding the USGS attributes is provided. + + #https://help.waterdata.usgs.gov/code/coord_acy_cd_query?fmt=html + acceptable_coord_acc_code = ['H','1','5','S','R','B','C','D','E'] + #https://help.waterdata.usgs.gov/code/coord_meth_cd_query?fmt=html + acceptable_coord_method_code = ['C','D','W','X','Y','Z','N','M','L','G','R','F','S'] + #https://help.waterdata.usgs.gov/codes-and-parameters/codes#SI + acceptable_alt_acc_thresh = 1 + #https://help.waterdata.usgs.gov/code/alt_meth_cd_query?fmt=html + acceptable_alt_meth_code = ['A','D','F','I','J','L','N','R','W','X','Y','Z'] + #https://help.waterdata.usgs.gov/code/site_tp_query?fmt=html + acceptable_site_type = ['ST'] + + #Cycle through each site and filter out if site doesn't meet criteria. + acceptable_sites_metadata = [] + for metadata in metadata_list: + #Get the usgs info from each site + usgs_data = metadata['usgs_data'] + + #Get site quality attributes + coord_accuracy_code = usgs_data.get('coord_accuracy_code') + coord_method_code = usgs_data.get('coord_method_code') + alt_accuracy_code = usgs_data.get('alt_accuracy_code') + alt_method_code = usgs_data.get('alt_method_code') + site_type = usgs_data.get('site_type') + + #Check to make sure that none of the codes were null, if null values are found, skip to next. + if not all([coord_accuracy_code, coord_method_code, alt_accuracy_code, alt_method_code, site_type]): + continue + + #Test if site meets criteria. + if (coord_accuracy_code in acceptable_coord_acc_code and + coord_method_code in acceptable_coord_method_code and + alt_accuracy_code <= acceptable_alt_acc_thresh and + alt_method_code in acceptable_alt_meth_code and + site_type in acceptable_site_type): + + #If nws_lid is not populated then add a dummy ID so that 'aggregate_wbd_hucs' works correctly. + if not metadata.get('identifiers').get('nws_lid'): + metadata['identifiers']['nws_lid'] = 'Bogus_ID' + + #Append metadata of acceptable site to acceptable_sites list. + acceptable_sites_metadata.append(metadata) + + #Get a geospatial layer (gdf) for all acceptable sites + dictionary, gdf = aggregate_wbd_hucs(acceptable_sites_metadata, Path(WBD_LAYER), retain_attributes = False) + #Get a list of all sites in gdf + list_of_sites = gdf['identifiers_usgs_site_code'].to_list() + #Rename gdf fields + gdf.columns = gdf.columns.str.replace('identifiers_','') + + return gdf, list_of_sites, acceptable_sites_metadata + +############################################################################## +#Generate categorical flows for each category across all sites. +############################################################################## +def write_categorical_flow_files(metadata, workspace): + ''' + Writes flow files of each category for every feature_id in the input metadata. + Written to supply input flow files of all gage sites for each flood category. + + Parameters + ---------- + metadata : DICT + Dictionary of metadata from WRDS (e.g. output from get_all_active_usgs_sites). + workspace : STR + Path to workspace where flow files will be saved. + + Returns + ------- + None. + + ''' + + threshold_url = f'{API_BASE_URL}/nws_threshold' + workspace = Path(workspace) + workspace.mkdir(parents = True, exist_ok = True) + #For each site in metadata + all_data = pd.DataFrame() + + for site in metadata: + #Get the feature_id and usgs_site_code + feature_id = site.get('identifiers').get('nwm_feature_id') + usgs_code = site.get('identifiers').get('usgs_site_code') + nws_lid = site.get('identifiers').get('nws_lid') + + #thresholds only provided for valid nws_lid. + if nws_lid == 'Bogus_ID': + continue + + #if invalid feature_id skip to next site + if feature_id is None: + continue + + #Get the stages and flows + stages, flows = get_thresholds(threshold_url, select_by = 'nws_lid', selector = nws_lid, threshold = 'all') + + #For each flood category + for category in ['action','minor','moderate','major']: + #Get flow + flow = flows.get(category, None) + #If flow or feature id are not valid, skip to next site + if flow is None: + continue + #Otherwise, write 'guts' of a flow file and append to a master DataFrame. + else: + data = flow_data([feature_id], flow, convert_to_cms = True) + data['recurr_interval'] = category + data['nws_lid'] = nws_lid + data['location_id'] = usgs_code + data = data.rename(columns = {'discharge':'discharge_cms'}) + #Append site data to master DataFrame + all_data = all_data.append(data, ignore_index = True) + + #Write CatFIM flows to file + final_data = all_data[['feature_id','discharge_cms', 'recurr_interval']] + final_data.to_csv(workspace / f'catfim_flows_cms.csv', index = False) + return all_data +############################################################################### + +def usgs_rating_to_elev(list_of_gage_sites, workspace=False, sleep_time = 1.0): + ''' + + Returns rating curves, for a set of sites, adjusted to elevation NAVD. + Currently configured to get rating curve data within CONUS. Tidal API + call may need to be modified to get datum conversions for AK, HI, PR/VI. + Workflow as follows: + 1a. If 'all' option passed, get metadata for all acceptable USGS sites in CONUS. + 1b. If a list of sites passed, get metadata for all sites supplied by user. + 2. Extract datum information for each site. + 3. If site is not in contiguous US skip (due to issue with datum conversions) + 4. Convert datum if NGVD + 5. Get rating curve for each site individually + 6. Convert rating curve to absolute elevation (NAVD) and store in DataFrame + 7. Append all rating curves to a master DataFrame. + + + Outputs, if a workspace is specified, are: + usgs_rating_curves.csv -- A csv containing USGS rating curve as well + as datum adjustment and rating curve expressed as an elevation (NAVD88). + ONLY SITES IN CONUS ARE CURRENTLY LISTED IN THIS CSV. To get + additional sites, the Tidal API will need to be reconfigured and tested. + + log.csv -- A csv containing runtime messages. + + (if all option passed) usgs_gages.gpkg -- a point layer containing ALL USGS gage sites that meet + certain criteria. In the attribute table is a 'curve' column that will indicate if a rating + curve is provided in "usgs_rating_curves.csv" + + Parameters + ---------- + list_of_gage_sites : LIST + List of all gage site IDs. If all acceptable sites in CONUS are desired + list_of_gage_sites can be passed 'all' and it will use the get_all_active_usgs_sites + function to filter out sites that meet certain requirements across CONUS. + + workspace : STR + Directory, if specified, where output csv is saved. OPTIONAL, Default is False. + + sleep_time: FLOAT + Amount of time to rest between API calls. The Tidal API appears to + error out more during business hours. Increasing sleep_time may help. + + + Returns + ------- + all_rating_curves : Pandas DataFrame + DataFrame containing USGS rating curves adjusted to elevation for + all input sites. Additional metadata also contained in DataFrame + + ''' + #Define URLs for metadata and rating curve + metadata_url = f'{API_BASE_URL}/metadata' + rating_curve_url = f'{API_BASE_URL}/rating_curve' + + #If 'all' option passed to list of gages sites, it retrieves all acceptable sites within CONUS. + print('getting metadata for all sites') + if list_of_gage_sites == ['all']: + acceptable_sites_gdf, acceptable_sites_list, metadata_list = get_all_active_usgs_sites() + #Otherwise, if a list of sites is passed, retrieve sites from WRDS. + else: + #Define arguments to retrieve metadata and then get metadata from WRDS + select_by = 'usgs_site_code' + selector = list_of_gage_sites + #Since there is a limit to number characters in url, split up selector if too many sites. + max_sites = 150 + if len(selector)>max_sites: + chunks = [selector[i:i+max_sites] for i in range(0,len(selector),max_sites)] + #Get metadata for each chunk + metadata_list = [] + metadata_df = pd.DataFrame() + for chunk in chunks: + chunk_list, chunk_df = get_metadata(metadata_url, select_by, chunk, must_include = None, upstream_trace_distance = None, downstream_trace_distance = None ) + #Append chunk data to metadata_list/df + metadata_list.extend(chunk_list) + metadata_df = metadata_df.append(chunk_df) + else: + #If selector has less than max sites, then get metadata. + metadata_list, metadata_df = get_metadata(metadata_url, select_by, selector, must_include = None, upstream_trace_distance = None, downstream_trace_distance = None ) + + #Create DataFrame to store all appended rating curves + print('processing metadata') + all_rating_curves = pd.DataFrame() + regular_messages = [] + api_failure_messages=[] + #For each site in metadata_list + for metadata in metadata_list: + + #Get datum information for site (only need usgs_data) + nws, usgs = get_datum(metadata) + + #Filter out sites that are not in contiguous US. If this section is removed be sure to test with datum adjustment section (region will need changed) + if usgs['state'] in ['Alaska', 'Puerto Rico', 'Virgin Islands', 'Hawaii']: + continue + + #Get rating curve for site + location_ids = usgs['usgs_site_code'] + curve = get_rating_curve(rating_curve_url, location_ids = [location_ids]) + #If no rating curve was returned, skip site. + if curve.empty: + message = f'{location_ids}: has no rating curve' + regular_messages.append(message) + continue + + #Adjust datum to NAVD88 if needed. If datum unknown, skip site. + if usgs['vcs'] == 'NGVD29': + #To prevent time-out errors + time.sleep(sleep_time) + #Get the datum adjustment to convert NGVD to NAVD. Region needs changed if not in CONUS. + datum_adj_ft = ngvd_to_navd_ft(datum_info = usgs, region = 'contiguous') + + #If datum API failed, print message and skip site. + if datum_adj_ft is None: + api_message = f"{location_ids}: datum adjustment failed!!" + api_failure_messages.append(api_message) + print(api_message) + continue + + #If datum adjustment succeeded, calculate datum in NAVD88 + navd88_datum = round(usgs['datum'] + datum_adj_ft, 2) + message = f'{location_ids}:succesfully converted NGVD29 to NAVD88' + regular_messages.append(message) + + elif usgs['vcs'] == 'NAVD88': + navd88_datum = usgs['datum'] + message = f'{location_ids}: already NAVD88' + regular_messages.append(message) + + else: + message = f"{location_ids}: datum unknown" + regular_messages.append(message) + continue + + #Populate rating curve with metadata and use navd88 datum to convert stage to elevation. + curve['active'] = usgs['active'] + curve['datum'] = usgs['datum'] + curve['datum_vcs'] = usgs['vcs'] + curve['navd88_datum'] = navd88_datum + curve['elevation_navd88'] = curve['stage'] + navd88_datum + #Append all rating curves to a dataframe + all_rating_curves = all_rating_curves.append(curve) + + #Rename columns and add attribute indicating if rating curve exists + acceptable_sites_gdf.rename(columns = {'nwm_feature_id':'feature_id','usgs_site_code':'location_id'}, inplace = True) + sites_with_data = pd.DataFrame({'location_id':all_rating_curves['location_id'].unique(),'curve':'yes'}) + acceptable_sites_gdf = acceptable_sites_gdf.merge(sites_with_data, on = 'location_id', how = 'left') + acceptable_sites_gdf.fillna({'curve':'no'},inplace = True) + #Add mainstems attribute to acceptable sites + print('Attributing mainstems sites') + #Import mainstems segments used in run_by_unit.sh + ms_df = gpd.read_file(NWM_FLOWS_MS) + ms_segs = ms_df.ID.astype(str).to_list() + #Populate mainstems attribute field + acceptable_sites_gdf['mainstem'] = 'no' + acceptable_sites_gdf.loc[acceptable_sites_gdf.eval('feature_id in @ms_segs'),'mainstem'] = 'yes' + + + #If workspace is specified, write data to file. + if workspace: + #Write rating curve dataframe to file + Path(workspace).mkdir(parents = True, exist_ok = True) + all_rating_curves.to_csv(Path(workspace) / 'usgs_rating_curves.csv', index = False) + #Save out messages to file. + first_line = [f'THERE WERE {len(api_failure_messages)} SITES THAT EXPERIENCED DATUM CONVERSION ISSUES'] + api_failure_messages = first_line + api_failure_messages + regular_messages = api_failure_messages + regular_messages + all_messages = pd.DataFrame({'Messages':regular_messages}) + all_messages.to_csv(Path(workspace) / 'log.csv', index = False) + #If 'all' option specified, reproject then write out shapefile of acceptable sites. + if list_of_gage_sites == ['all']: + acceptable_sites_gdf = acceptable_sites_gdf.to_crs(PREP_PROJECTION) + acceptable_sites_gdf.to_file(Path(workspace) / 'usgs_gages.gpkg', layer = 'usgs_gages', driver = 'GPKG') + + #Write out flow files for each threshold across all sites + all_data = write_categorical_flow_files(metadata_list, workspace) + + return all_rating_curves + +if __name__ == '__main__': + #Parse arguments + parser = argparse.ArgumentParser(description = 'Retrieve USGS rating curves adjusted to elevation (NAVD88).\nCurrently configured to get rating curves within CONUS.\nRecommend running outside of business hours to reduce API related errors.\nIf error occurs try increasing sleep time (from default of 1).') + parser.add_argument('-l', '--list_of_gage_sites', help = '"all" for all active usgs sites, specify individual sites separated by space, or provide a csv of sites (one per line).', nargs = '+', required = True) + parser.add_argument('-w', '--workspace', help = 'Directory where all outputs will be stored.', default = False, required = False) + parser.add_argument('-t', '--sleep_timer', help = 'How long to rest between datum API calls', default = 1.0, required = False) + + #Extract to dictionary and assign to variables. + args = vars(parser.parse_args()) + + #Check if csv is supplied + if args['list_of_gage_sites'][0].endswith('.csv'): + #Convert csv list to python list + with open(args['list_of_gage_sites']) as f: + sites = f.read().splitlines() + args['list_of_gage_sites'] = sites + + l = args['list_of_gage_sites'] + w = args['workspace'] + t = float(args['sleep_timer']) + + #Generate USGS rating curves + usgs_rating_to_elev(list_of_gage_sites = l, workspace=w, sleep_time = t) + \ No newline at end of file diff --git a/tools/run_test_case.py b/tools/run_test_case.py new file mode 100755 index 000000000..9d4870565 --- /dev/null +++ b/tools/run_test_case.py @@ -0,0 +1,253 @@ +#!/usr/bin/env python3 + +import os +import sys +import shutil +import argparse + +from tools_shared_functions import compute_contingency_stats_from_rasters +from tools_shared_variables import (TEST_CASES_DIR, INPUTS_DIR, ENDC, TRED_BOLD, WHITE_BOLD, CYAN_BOLD, AHPS_BENCHMARK_CATEGORIES) +from inundation import inundate + +def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous=False, archive_results=False, mask_type='huc', inclusion_area='', inclusion_area_buffer=0, light_run=False, overwrite=True): + + benchmark_category = test_id.split('_')[1] # Parse benchmark_category from test_id. + current_huc = test_id.split('_')[0] # Break off HUC ID and assign to variable. + + # Construct paths to development test results if not existent. + if archive_results: + version_test_case_dir_parent = os.path.join(TEST_CASES_DIR, benchmark_category + '_test_cases', test_id, 'official_versions', version) + else: + version_test_case_dir_parent = os.path.join(TEST_CASES_DIR, benchmark_category + '_test_cases', test_id, 'testing_versions', version) + + # Delete the entire directory if it already exists. + if os.path.exists(version_test_case_dir_parent): + if overwrite == True: + shutil.rmtree(version_test_case_dir_parent) + else: + print("Metrics for ({version}: {test_id}) already exist. Use overwrite flag (-o) to overwrite metrics.".format(version=version, test_id=test_id)) + return + + os.mkdir(version_test_case_dir_parent) + + print("Running the alpha test for test_id: " + test_id + ", " + version + "...") + stats_modes_list = ['total_area'] + + fim_run_parent = os.path.join(os.environ['outputDataDir'], fim_run_dir) + assert os.path.exists(fim_run_parent), "Cannot locate " + fim_run_parent + + # Create paths to fim_run outputs for use in inundate(). + rem = os.path.join(fim_run_parent, 'rem_zeroed_masked.tif') + if not os.path.exists(rem): + rem = os.path.join(fim_run_parent, 'rem_clipped_zeroed_masked.tif') + catchments = os.path.join(fim_run_parent, 'gw_catchments_reaches_filtered_addedAttributes.tif') + if not os.path.exists(catchments): + catchments = os.path.join(fim_run_parent, 'gw_catchments_reaches_clipped_addedAttributes.tif') + if mask_type == 'huc': + catchment_poly = '' + else: + catchment_poly = os.path.join(fim_run_parent, 'gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg') + hydro_table = os.path.join(fim_run_parent, 'hydroTable.csv') + + # Map necessary inputs for inundation(). + hucs, hucs_layerName = os.path.join(INPUTS_DIR, 'wbd', 'WBD_National.gpkg'), 'WBDHU8' + + # Create list of shapefile paths to use as exclusion areas. + zones_dir = os.path.join(TEST_CASES_DIR, 'other', 'zones') + mask_dict = {'levees': + {'path': os.path.join(zones_dir, 'leveed_areas_conus.shp'), + 'buffer': None, + 'operation': 'exclude' + }, + 'waterbodies': + {'path': os.path.join(zones_dir, 'nwm_v2_reservoirs.shp'), + 'buffer': None, + 'operation': 'exclude', + }, + } + + if inclusion_area != '': + inclusion_area_name = os.path.split(inclusion_area)[1].split('.')[0] # Get layer name + mask_dict.update({inclusion_area_name: {'path': inclusion_area, + 'buffer': int(inclusion_area_buffer), + 'operation': 'include'}}) + # Append the concatenated inclusion_area_name and buffer. + if inclusion_area_buffer == None: + inclusion_area_buffer = 0 + stats_modes_list.append(inclusion_area_name + '_b' + str(inclusion_area_buffer) + 'm') + + # Check if magnitude is list of magnitudes or single value. + magnitude_list = magnitude + if type(magnitude_list) != list: + magnitude_list = [magnitude_list] + + + # Get path to validation_data_{benchmark} directory and huc_dir. + validation_data_path = os.path.join(TEST_CASES_DIR, benchmark_category + '_test_cases', 'validation_data_' + benchmark_category) + for magnitude in magnitude_list: + version_test_case_dir = os.path.join(version_test_case_dir_parent, magnitude) + if not os.path.exists(version_test_case_dir): + os.mkdir(version_test_case_dir) + # Construct path to validation raster and forecast file. + if benchmark_category in AHPS_BENCHMARK_CATEGORIES: + benchmark_raster_path_list, forecast_list = [], [] + lid_dir_list = os.listdir(os.path.join(validation_data_path, current_huc)) + lid_list, inundation_raster_list, domain_file_list = [], [], [] + + for lid in lid_dir_list: + lid_dir = os.path.join(validation_data_path, current_huc, lid) + benchmark_lid_raster_path = os.path.join(lid_dir, magnitude, 'ahps_' + lid + '_huc_' + current_huc + '_extent_' + magnitude + '.tif') + + # Only compare if the benchmark data exist. + if os.path.exists(benchmark_lid_raster_path): + benchmark_raster_path_list.append(benchmark_lid_raster_path) # TEMP + forecast_list.append(os.path.join(lid_dir, magnitude, 'ahps_' + lid + '_huc_' + current_huc + '_flows_' + magnitude + '.csv')) # TEMP + lid_list.append(lid) + inundation_raster_list.append(os.path.join(version_test_case_dir, lid + '_inundation_extent.tif')) + domain_file_list.append(os.path.join(lid_dir, lid + '_domain.shp')) + + else: + benchmark_raster_file = os.path.join(TEST_CASES_DIR, benchmark_category + '_test_cases', 'validation_data_' + benchmark_category, current_huc, magnitude, benchmark_category + '_huc_' + current_huc + '_extent_' + magnitude + '.tif') + benchmark_raster_path_list = [benchmark_raster_file] + forecast_path = os.path.join(TEST_CASES_DIR, benchmark_category + '_test_cases', 'validation_data_' + benchmark_category, current_huc, magnitude, benchmark_category + '_huc_' + current_huc + '_flows_' + magnitude + '.csv') + forecast_list = [forecast_path] + inundation_raster_list = [os.path.join(version_test_case_dir, 'inundation_extent.tif')] + + for index in range(0, len(benchmark_raster_path_list)): + benchmark_raster_path = benchmark_raster_path_list[index] + forecast = forecast_list[index] + inundation_raster = inundation_raster_list[index] + # Only need to define ahps_lid and ahps_extent_file for AHPS_BENCHMARK_CATEGORIES. + if benchmark_category in AHPS_BENCHMARK_CATEGORIES: + ahps_lid = lid_list[index] + ahps_domain_file = domain_file_list[index] + mask_dict.update({ahps_lid: + {'path': ahps_domain_file, + 'buffer': None, + 'operation': 'include'} + }) + + + if not os.path.exists(benchmark_raster_path) or not os.path.exists(ahps_domain_file) or not os.path.exists(forecast): # Skip loop instance if the benchmark raster doesn't exist. + continue + else: # If not in AHPS_BENCHMARK_CATEGORIES. + if not os.path.exists(benchmark_raster_path) or not os.path.exists(forecast): # Skip loop instance if the benchmark raster doesn't exist. + continue + # Run inundate. +# print("-----> Running inundate() to produce modeled inundation extent for the " + magnitude + " magnitude...") + try: + inundate_test = inundate( + rem,catchments,catchment_poly,hydro_table,forecast,mask_type,hucs=hucs,hucs_layerName=hucs_layerName, + subset_hucs=current_huc,num_workers=1,aggregate=False,inundation_raster=inundation_raster,inundation_polygon=None, + depths=None,out_raster_profile=None,out_vector_profile=None,quiet=True + ) + if inundate_test == 0: +# print("-----> Inundation mapping complete.") + predicted_raster_path = os.path.join(os.path.split(inundation_raster)[0], os.path.split(inundation_raster)[1].replace('.tif', '_' + current_huc + '.tif')) # The inundate adds the huc to the name so I account for that here. + + # Define outputs for agreement_raster, stats_json, and stats_csv. + if benchmark_category in AHPS_BENCHMARK_CATEGORIES: + agreement_raster, stats_json, stats_csv = os.path.join(version_test_case_dir, lid + 'total_area_agreement.tif'), os.path.join(version_test_case_dir, 'stats.json'), os.path.join(version_test_case_dir, 'stats.csv') + else: + agreement_raster, stats_json, stats_csv = os.path.join(version_test_case_dir, 'total_area_agreement.tif'), os.path.join(version_test_case_dir, 'stats.json'), os.path.join(version_test_case_dir, 'stats.csv') + + compute_contingency_stats_from_rasters(predicted_raster_path, + benchmark_raster_path, + agreement_raster, + stats_csv=stats_csv, + stats_json=stats_json, + mask_values=[], + stats_modes_list=stats_modes_list, + test_id=test_id, + mask_dict=mask_dict, + ) + + if benchmark_category in AHPS_BENCHMARK_CATEGORIES: + del mask_dict[ahps_lid] + + print(" ") + print("Evaluation metrics for " + test_id + ", " + version + ", " + magnitude + " are available at " + CYAN_BOLD + version_test_case_dir + ENDC) + print(" ") + elif inundate_test == 1: + pass + print (f"No matching feature IDs between forecast and hydrotable for magnitude: {magnitude}") + #return + except Exception as e: + print(e) + + if benchmark_category in AHPS_BENCHMARK_CATEGORIES: + # -- Delete temp files -- # + # List all files in the output directory. + output_file_list = os.listdir(version_test_case_dir) + for output_file in output_file_list: + if "total_area" in output_file: + full_output_file_path = os.path.join(version_test_case_dir, output_file) + os.remove(full_output_file_path) + + +if __name__ == '__main__': + + # Parse arguments. + parser = argparse.ArgumentParser(description='Inundation mapping and regression analysis for FOSS FIM. Regression analysis results are stored in the test directory.') + parser.add_argument('-r','--fim-run-dir',help='Name of directory containing outputs of fim_run.sh',required=True) + parser.add_argument('-b', '--version',help='The name of the working version in which features are being tested',required=True,default="") + parser.add_argument('-t', '--test-id',help='The test_id to use. Format as: HUC_BENCHMARKTYPE, e.g. 12345678_ble.',required=True,default="") + parser.add_argument('-m', '--mask-type', help='Specify \'huc\' (FIM < 3) or \'filter\' (FIM >= 3) masking method', required=False,default="huc") + parser.add_argument('-y', '--magnitude',help='The magnitude to run.',required=False, default="") + parser.add_argument('-c', '--compare-to-previous', help='Compare to previous versions of HAND.', required=False,action='store_true') + parser.add_argument('-a', '--archive-results', help='Automatically copy results to the "previous_version" archive for test_id. For admin use only.', required=False,action='store_true') + parser.add_argument('-i', '--inclusion-area', help='Path to shapefile. Contingency metrics will be produced from pixels inside of shapefile extent.', required=False, default="") + parser.add_argument('-ib','--inclusion-area-buffer', help='Buffer to use when masking contingency metrics with inclusion area.', required=False, default="0") + parser.add_argument('-l', '--light-run', help='Using the light_run option will result in only stat files being written, and NOT grid files.', required=False, action='store_true') + parser.add_argument('-o','--overwrite',help='Overwrite all metrics or only fill in missing metrics.',required=False, default=False, action='store_true') + + # Extract to dictionary and assign to variables. + args = vars(parser.parse_args()) + + valid_test_id_list = os.listdir(TEST_CASES_DIR) + + exit_flag = False # Default to False. + print() + + # Ensure test_id is valid. +# if args['test_id'] not in valid_test_id_list: +# print(TRED_BOLD + "Warning: " + WHITE_BOLD + "The provided test_id (-t) " + CYAN_BOLD + args['test_id'] + WHITE_BOLD + " is not available." + ENDC) +# print(WHITE_BOLD + "Available test_ids include: " + ENDC) +# for test_id in valid_test_id_list: +# if 'validation' not in test_id.split('_') and 'ble' in test_id.split('_'): +# print(CYAN_BOLD + test_id + ENDC) +# print() +# exit_flag = True + + # Ensure fim_run_dir exists. + if not os.path.exists(os.path.join(os.environ['outputDataDir'], args['fim_run_dir'])): + print(TRED_BOLD + "Warning: " + WHITE_BOLD + "The provided fim_run_dir (-r) " + CYAN_BOLD + args['fim_run_dir'] + WHITE_BOLD + " could not be located in the 'outputs' directory." + ENDC) + print(WHITE_BOLD + "Please provide the parent directory name for fim_run.sh outputs. These outputs are usually written in a subdirectory, e.g. outputs/123456/123456." + ENDC) + print() + exit_flag = True + + # Ensure inclusion_area path exists. + if args['inclusion_area'] != "" and not os.path.exists(args['inclusion_area']): + print(TRED_BOLD + "Error: " + WHITE_BOLD + "The provided inclusion_area (-i) " + CYAN_BOLD + args['inclusion_area'] + WHITE_BOLD + " could not be located." + ENDC) + exit_flag = True + + try: + inclusion_buffer = int(args['inclusion_area_buffer']) + except ValueError: + print(TRED_BOLD + "Error: " + WHITE_BOLD + "The provided inclusion_area_buffer (-ib) " + CYAN_BOLD + args['inclusion_area_buffer'] + WHITE_BOLD + " is not a round number." + ENDC) + + if args['magnitude'] == '': + if 'ble' in args['test_id'].split('_'): + args['magnitude'] = ['100yr', '500yr'] + elif 'nws' or 'usgs' in args['test_id'].split('_'): + args['magnitude'] = ['action', 'minor', 'moderate', 'major'] + else: + print(TRED_BOLD + "Error: " + WHITE_BOLD + "The provided magnitude (-y) " + CYAN_BOLD + args['magnitude'] + WHITE_BOLD + " is invalid. ble options include: 100yr, 500yr. ahps options include action, minor, moderate, major." + ENDC) + exit_flag = True + + if exit_flag: + print() + sys.exit() + + else: + run_alpha_test(**args) diff --git a/tools/synthesize_test_cases.py b/tools/synthesize_test_cases.py new file mode 100755 index 000000000..e49b1519d --- /dev/null +++ b/tools/synthesize_test_cases.py @@ -0,0 +1,357 @@ +#!/usr/bin/env python3 + +import os +import argparse +from multiprocessing import Pool +import json +import csv + +from run_test_case import run_alpha_test +from tools_shared_variables import TEST_CASES_DIR, PREVIOUS_FIM_DIR, OUTPUTS_DIR, AHPS_BENCHMARK_CATEGORIES, MAGNITUDE_DICT + + +def create_master_metrics_csv(master_metrics_csv_output, dev_versions_to_include_list): + """ + This function searches for and collates metrics into a single CSV file that can queried database-style. The + CSV is an input to eval_plots.py. This function automatically looks for metrics produced for official versions + and loads them into memory to be written to the output CSV. + + Args: + master_metrics_csv_output (str): Full path to CSV output. If a file already exists at this path, it will be overwritten. + dev_versions_to_include_list (list): A list of non-official FIM version names. If a user supplied information on the command + line using the -dc flag, then this function will search for metrics in the "testing_versions" + library of metrics and include them in the CSV output. + + """ + + # Construct header + metrics_to_write = ['true_negatives_count', + 'false_negatives_count', + 'true_positives_count', + 'false_positives_count', + 'contingency_tot_count', + 'cell_area_m2', + 'TP_area_km2', + 'FP_area_km2', + 'TN_area_km2', + 'FN_area_km2', + 'contingency_tot_area_km2', + 'predPositive_area_km2', + 'predNegative_area_km2', + 'obsPositive_area_km2', + 'obsNegative_area_km2', + 'positiveDiff_area_km2', + 'CSI', + 'FAR', + 'TPR', + 'TNR', + 'PPV', + 'NPV', + 'ACC', + 'Bal_ACC', + 'MCC', + 'EQUITABLE_THREAT_SCORE', + 'PREVALENCE', + 'BIAS', + 'F1_SCORE', + 'TP_perc', + 'FP_perc', + 'TN_perc', + 'FN_perc', + 'predPositive_perc', + 'predNegative_perc', + 'obsPositive_perc', + 'obsNegative_perc', + 'positiveDiff_perc', + 'masked_count', + 'masked_perc', + 'masked_area_km2' + ] + + additional_header_info_prefix = ['version', 'nws_lid', 'magnitude', 'huc'] + list_to_write = [additional_header_info_prefix + metrics_to_write + ['full_json_path'] + ['flow'] + ['benchmark_source'] + ['extent_config'] + ["calibrated"]] + + versions_to_aggregate = os.listdir(PREVIOUS_FIM_DIR) + + if len(dev_versions_to_include_list) > 0: + iteration_list = ['official', 'comparison'] + else: + iteration_list = ['official'] + + for benchmark_source in ['ble', 'nws', 'usgs', 'ifc']: + benchmark_test_case_dir = os.path.join(TEST_CASES_DIR, benchmark_source + '_test_cases') + if benchmark_source in ['ble', 'ifc']: + + if benchmark_source == 'ble': + magnitude_list = MAGNITUDE_DICT['ble'] + if benchmark_source == 'ifc': + magnitude_list = MAGNITUDE_DICT['ifc'] + test_cases_list = os.listdir(benchmark_test_case_dir) + + for test_case in test_cases_list: + try: + int(test_case.split('_')[0]) + + huc = test_case.split('_')[0] + + for iteration in iteration_list: + + if iteration == "official": + versions_to_crawl = os.path.join(benchmark_test_case_dir, test_case, 'official_versions') + versions_to_aggregate = os.listdir(PREVIOUS_FIM_DIR) + if iteration == "comparison": + versions_to_crawl = os.path.join(benchmark_test_case_dir, test_case, 'testing_versions') + versions_to_aggregate = dev_versions_to_include_list + + for magnitude in magnitude_list: + for version in versions_to_aggregate: + if '_fr' in version: + extent_config = 'FR' + elif '_ms' in version: + extent_config = 'MS' + else: + extent_config = 'FR' + if "_c" in version and version.split('_c')[1] == "": + calibrated = "yes" + else: + calibrated = "no" + version_dir = os.path.join(versions_to_crawl, version) + magnitude_dir = os.path.join(version_dir, magnitude) + + if os.path.exists(magnitude_dir): + magnitude_dir_list = os.listdir(magnitude_dir) + for f in magnitude_dir_list: + if '.json' in f: + flow = 'NA' + nws_lid = "NA" + sub_list_to_append = [version, nws_lid, magnitude, huc] + full_json_path = os.path.join(magnitude_dir, f) + if os.path.exists(full_json_path): + stats_dict = json.load(open(full_json_path)) + for metric in metrics_to_write: + sub_list_to_append.append(stats_dict[metric]) + sub_list_to_append.append(full_json_path) + sub_list_to_append.append(flow) + sub_list_to_append.append(benchmark_source) + sub_list_to_append.append(extent_config) + sub_list_to_append.append(calibrated) + + list_to_write.append(sub_list_to_append) + except ValueError: + pass + + if benchmark_source in AHPS_BENCHMARK_CATEGORIES: + test_cases_list = os.listdir(benchmark_test_case_dir) + + for test_case in test_cases_list: + try: + int(test_case.split('_')[0]) + + huc = test_case.split('_')[0] + + for iteration in iteration_list: + + if iteration == "official": + versions_to_crawl = os.path.join(benchmark_test_case_dir, test_case, 'official_versions') + versions_to_aggregate = os.listdir(PREVIOUS_FIM_DIR) + if iteration == "comparison": + versions_to_crawl = os.path.join(benchmark_test_case_dir, test_case, 'testing_versions') + versions_to_aggregate = dev_versions_to_include_list + + for magnitude in ['action', 'minor', 'moderate', 'major']: + for version in versions_to_aggregate: + if '_fr' in version: + extent_config = 'FR' + elif '_ms' in version: + extent_config = 'MS' + else: + extent_config = 'FR' + if "_c" in version and version.split('_c')[1] == "": + calibrated = "yes" + else: + calibrated = "no" + + version_dir = os.path.join(versions_to_crawl, version) + magnitude_dir = os.path.join(version_dir, magnitude) + if os.path.exists(magnitude_dir): + magnitude_dir_list = os.listdir(magnitude_dir) + for f in magnitude_dir_list: + if '.json' in f and 'total_area' not in f: + nws_lid = f[:5] + sub_list_to_append = [version, nws_lid, magnitude, huc] + full_json_path = os.path.join(magnitude_dir, f) + flow = '' + if os.path.exists(full_json_path): + + # Get flow used to map. + flow_file = os.path.join(benchmark_test_case_dir, 'validation_data_' + benchmark_source, huc, nws_lid, magnitude, 'ahps_' + nws_lid + '_huc_' + huc + '_flows_' + magnitude + '.csv') + if os.path.exists(flow_file): + with open(flow_file, newline='') as csv_file: + reader = csv.reader(csv_file) + next(reader) + for row in reader: + flow = row[1] + + stats_dict = json.load(open(full_json_path)) + for metric in metrics_to_write: + sub_list_to_append.append(stats_dict[metric]) + sub_list_to_append.append(full_json_path) + sub_list_to_append.append(flow) + sub_list_to_append.append(benchmark_source) + sub_list_to_append.append(extent_config) + sub_list_to_append.append(calibrated) + + list_to_write.append(sub_list_to_append) + except ValueError: + pass + + with open(master_metrics_csv_output, 'w', newline='') as csvfile: + csv_writer = csv.writer(csvfile) + csv_writer.writerows(list_to_write) + + +def process_alpha_test(args): + """ + This function is designed to be used in multiprocessing. It handles the calling of the run_alpha_test function. + + Args: + args (list): Formatted [fim_run_dir (str), version (str), test_id (str), magnitude (str), archive_results (bool), overwrite (bool)] + + """ + + + fim_run_dir = args[0] + version = args[1] + test_id = args[2] + magnitude = args[3] + archive_results = args[4] + overwrite = args[5] + + mask_type = 'huc' + + if archive_results == False: + compare_to_previous = True + else: + compare_to_previous = False + + try: + run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous=compare_to_previous, archive_results=archive_results, mask_type=mask_type, overwrite=overwrite) + except Exception as e: + print(e) + + +if __name__ == '__main__': + + # Parse arguments. + parser = argparse.ArgumentParser(description='Caches metrics from previous versions of HAND.') + parser.add_argument('-c','--config',help='Save outputs to development_versions or previous_versions? Options: "DEV" or "PREV"',required=True) + parser.add_argument('-v','--fim-version',help='Name of fim version to cache.',required=False, default="all") + parser.add_argument('-j','--job-number',help='Number of processes to use. Default is 1.',required=False, default="1") + parser.add_argument('-s','--special-string',help='Add a special name to the end of the branch.',required=False, default="") + parser.add_argument('-b','--benchmark-category',help='A benchmark category to specify. Defaults to process all categories.',required=False, default="all") + parser.add_argument('-o','--overwrite',help='Overwrite all metrics or only fill in missing metrics.',required=False, action="store_true") + parser.add_argument('-dc', '--dev-version-to-compare', nargs='+', help='Specify the name(s) of a dev (testing) version to include in master metrics CSV. Pass a space-delimited list.',required=False) + parser.add_argument('-m','--master-metrics-csv',help='Define path for master metrics CSV file.',required=True) + + # Assign variables from arguments. + args = vars(parser.parse_args()) + config = args['config'] + fim_version = args['fim_version'] + job_number = int(args['job_number']) + special_string = args['special_string'] + benchmark_category = args['benchmark_category'] + overwrite = args['overwrite'] + dev_versions_to_compare = args['dev_version_to_compare'] + master_metrics_csv = args['master_metrics_csv'] + + if overwrite: + if input("Are you sure you want to overwrite metrics? y/n: ") == "n": + quit + + # Default to processing all possible versions in PREVIOUS_FIM_DIR. Otherwise, process only the user-supplied version. + if fim_version != "all": + previous_fim_list = [fim_version] + else: + if config == 'PREV': + previous_fim_list = os.listdir(PREVIOUS_FIM_DIR) + elif config == 'DEV': + previous_fim_list = os.listdir(OUTPUTS_DIR) + + # Define whether or not to archive metrics in "official_versions" or "testing_versions" for each test_id. + if config == 'PREV': + archive_results = True + elif config == 'DEV': + archive_results = False + else: + print('Config (-c) option incorrectly set. Use "DEV" or "PREV"') + + # List all available benchmark categories and test_cases. + test_cases_dir_list = os.listdir(TEST_CASES_DIR) + benchmark_category_list = [] + if benchmark_category == "all": + for d in test_cases_dir_list: + if 'test_cases' in d: + benchmark_category_list.append(d.replace('_test_cases', '')) + else: + benchmark_category_list = [benchmark_category] + + # Loop through benchmark categories. + procs_list = [] + for bench_cat in benchmark_category_list: + + # Map path to appropriate test_cases folder and list test_ids into bench_cat_id_list. + bench_cat_test_case_dir = os.path.join(TEST_CASES_DIR, bench_cat + '_test_cases') + bench_cat_id_list = os.listdir(bench_cat_test_case_dir) + + # Loop through test_ids in bench_cat_id_list. + for test_id in bench_cat_id_list: + if 'validation' and 'other' not in test_id: + current_huc = test_id.split('_')[0] + if test_id.split('_')[1] in bench_cat: + # Loop through versions. + for version in previous_fim_list: + if config == 'DEV': + fim_run_dir = os.path.join(OUTPUTS_DIR, version, current_huc) + elif config == 'PREV': + fim_run_dir = os.path.join(PREVIOUS_FIM_DIR, version, current_huc) + + # For previous versions of HAND computed at HUC6 scale + if not os.path.exists(fim_run_dir): + print(fim_run_dir) + if config == 'DEV': + fim_run_dir = os.path.join(OUTPUTS_DIR, version, current_huc[:6]) + elif config == 'PREV': + fim_run_dir = os.path.join(PREVIOUS_FIM_DIR, version, current_huc[:6]) + + if os.path.exists(fim_run_dir): + # If a user supplies a special_string (-s), then add it to the end of the created dirs. + if special_string != "": + version = version + '_' + special_string + + + # Define the magnitude lists to use, depending on test_id. + benchmark_type = test_id.split('_')[1] + magnitude = MAGNITUDE_DICT[benchmark_type] + + # Either add to list to multiprocess or process serially, depending on user specification. + if job_number > 1: + procs_list.append([fim_run_dir, version, test_id, magnitude, archive_results, overwrite]) + else: + process_alpha_test([fim_run_dir, version, test_id, magnitude, archive_results, overwrite]) + + # Multiprocess alpha test runs. + if job_number > 1: + with Pool(processes=job_number) as pool: + pool.map(process_alpha_test, procs_list) + + if config == 'DEV': + if dev_versions_to_compare != None: + dev_versions_to_include_list = dev_versions_to_compare + [version] + else: + dev_versions_to_include_list = [version] + if config == 'PREV': + dev_versions_to_include_list = [] + + # Do aggregate_metrics. + print("Creating master metrics CSV...") + create_master_metrics_csv(master_metrics_csv_output=master_metrics_csv, dev_versions_to_include_list=dev_versions_to_include_list) diff --git a/tools/thalweg_drop_check.py b/tools/thalweg_drop_check.py new file mode 100644 index 000000000..319c8bcd5 --- /dev/null +++ b/tools/thalweg_drop_check.py @@ -0,0 +1,383 @@ +#!/usr/bin/env python3 + +import os +import sys +import geopandas as gpd +sys.path.append('/foss_fim/src') +from shapely.geometry import Point, LineString +import rasterio +import numpy as np +import argparse +import matplotlib.pyplot as plt +import seaborn as sns +from collections import deque +from os.path import join +from multiprocessing import Pool +from utils.shared_functions import getDriver +from rasterio import features +from reachID_grid_to_vector_points import convert_grid_cells_to_points +import warnings +warnings.simplefilter(action='ignore', category=FutureWarning) + +""" + Plot Rating Curves and Compare to USGS Gages + + Parameters + ---------- + fim_dir : str + Directory containing FIM output folders. + output_dir : str + Stream layer to be evaluated. + stream_type : str + File name of USGS rating curves. + point_density : str + Elevation sampling density. + number_of_jobs : str + Number of jobs. +""" + + +def compare_thalweg(args): + + huc_dir = args[0] + stream_type = args[1] + point_density = args[2] + huc = args[3] + dem_meters_filename = args[4] + dem_lateral_thalweg_adj_filename = args[5] + dem_thalwegCond_filename = args[6] + profile_plots_filename = args[7] + profile_gpkg_filename = args[8] + profile_table_filename = args[9] + flows_grid_boolean_filename = args[10] + + if stream_type == 'derived': + + dem_derived_reaches_filename = os.path.join(huc_dir,'demDerived_reaches_split.gpkg') + streams = gpd.read_file(dem_derived_reaches_filename) + nhd_headwater_filename = os.path.join(huc_dir,'nhd_headwater_points_subset.gpkg') + wbd_filename = os.path.join(huc_dir,'wbd.gpkg') + wbd = gpd.read_file(wbd_filename) + headwaters_layer = gpd.read_file(nhd_headwater_filename,mask=wbd) + headwater_list = headwaters_layer.loc[headwaters_layer.pt_type == 'nws_lid'] + stream_id = 'HydroID' + + elif stream_type == 'burnline': + + nhd_reaches_filename = os.path.join(huc_dir,'NHDPlusBurnLineEvent_subset.gpkg') + nhd_reaches = gpd.read_file(nhd_reaches_filename) + streams = nhd_reaches.copy() + headwaters_layer = None + + # Get lists of all complete reaches using headwater attributes + headwater_list = streams.loc[streams.nws_lid!=''].nws_lid + stream_id = 'NHDPlusID' + + headwater_col = 'is_headwater' + streams[headwater_col] = False + headwater_list = headwater_list.reset_index(drop=True) + + if stream_type == 'derived': + streams['nws_lid'] = '' + + if streams.NextDownID.dtype != 'int': streams.NextDownID = streams.NextDownID.astype(int) + + min_dist = np.empty(len(headwater_list)) + streams['min_dist'] = 1000 + + for i, point in headwater_list.iterrows(): + streams['min_dist'] = [point.geometry.distance(line) for line in streams.geometry] + streams.loc[streams.min_dist==np.min(streams.min_dist),'nws_lid'] = point.site_id + + headwater_list = headwater_list.site_id + + streams.set_index(stream_id,inplace=True,drop=False) + + # Collect headwater streams + single_stream_paths = [] + dem_meters = rasterio.open(dem_meters_filename,'r') + index_option = 'reachID' + for index, headwater_site in enumerate(headwater_list): + stream_path = get_downstream_segments(streams.copy(),'nws_lid', headwater_site,'downstream',stream_id,stream_type) + stream_path = stream_path.reset_index(drop=True) + stream_path = stream_path.sort_values(by=['downstream_count']) + stream_path = stream_path.loc[stream_path.downstream==True] + if stream_type == 'burnline': + geom_value = [] + for index, segment in stream_path.iterrows(): + lineString = LineString(segment.geometry.coords[::-1]) + geom_value = geom_value + [(lineString, segment.downstream_count)] + nhd_reaches_raster = features.rasterize(shapes=geom_value , out_shape=[dem_meters.height, dem_meters.width],fill=dem_meters.nodata,transform=dem_meters.transform, all_touched=True, dtype=np.float32) + flow_bool = rasterio.open(flows_grid_boolean_filename) + flow_bool_data = flow_bool.read(1) + nhd_reaches_raster = np.where(flow_bool_data == int(0), -9999.0, (nhd_reaches_raster).astype(rasterio.float32)) + out_dem_filename = os.path.join(huc_dir,'NHDPlusBurnLineEvent_raster.tif') + with rasterio.open(out_dem_filename, "w", **dem_meters.profile, BIGTIFF='YES') as dest: + dest.write(nhd_reaches_raster, indexes = 1) + stream_path = convert_grid_cells_to_points(out_dem_filename,index_option) + stream_path["headwater_path"] = headwater_site + single_stream_paths = single_stream_paths + [stream_path] + print(f"length of {headwater_site} path: {len(stream_path)}") + + # Collect elevation values from multiple grids along each individual reach point + dem_lateral_thalweg_adj = rasterio.open(dem_lateral_thalweg_adj_filename,'r') + dem_thalwegCond = rasterio.open(dem_thalwegCond_filename,'r') + thalweg_points = gpd.GeoDataFrame() + for path in single_stream_paths: + split_points = [] + stream_ids = [] + dem_m_elev = [] + dem_burned_filled_elev = [] + dem_lat_thal_adj_elev = [] + dem_thal_adj_elev = [] + headwater_path = [] + index_count = [] + for index, segment in path.iterrows(): + if stream_type == 'derived': + linestring = segment.geometry + if point_density == 'midpoints': + midpoint = linestring.interpolate(0.5,normalized=True) + stream_ids = stream_ids + [segment[stream_id]] + split_points = split_points + [midpoint] + index_count = index_count + [segment.downstream_count] + dem_m_elev = dem_m_elev + [np.array(list(dem_meters.sample((Point(midpoint).coords), indexes=1))).item()] + dem_lat_thal_adj_elev = dem_lat_thal_adj_elev + [np.array(list(dem_lateral_thalweg_adj.sample((Point(midpoint).coords), indexes=1))).item()] + dem_thal_adj_elev = dem_thal_adj_elev + [np.array(list(dem_thalwegCond.sample((Point(midpoint).coords), indexes=1))).item()] + headwater_path = headwater_path + [segment.headwater_path] + elif point_density == 'all_points': + count=0 + for point in zip(*linestring.coords.xy): + stream_ids = stream_ids + [segment[stream_id]] + split_points = split_points + [Point(point)] + count = count + 1 + index_count = index_count + [segment.downstream_count*1000 + count] + dem_m_elev = dem_m_elev + [np.array(list(dem_meters.sample((Point(point).coords), indexes=1))).item()] + dem_lat_thal_adj_elev = dem_lat_thal_adj_elev + [np.array(list(dem_lateral_thalweg_adj.sample((Point(point).coords), indexes=1))).item()] + dem_thal_adj_elev = dem_thal_adj_elev + [np.array(list(dem_thalwegCond.sample((Point(point).coords), indexes=1))).item()] + headwater_path = headwater_path + [segment.headwater_path] + elif stream_type == 'burnline': + stream_ids = stream_ids + [segment['id']] + split_points = split_points + [Point(segment.geometry)] + index_count = index_count + [segment['id']] + dem_m_elev = dem_m_elev + [np.array(list(dem_meters.sample((Point(segment.geometry).coords), indexes=1))).item()] + dem_lat_thal_adj_elev = dem_lat_thal_adj_elev + [np.array(list(dem_lateral_thalweg_adj.sample((Point(segment.geometry).coords), indexes=1))).item()] + dem_thal_adj_elev = dem_thal_adj_elev + [np.array(list(dem_thalwegCond.sample((Point(segment.geometry).coords), indexes=1))).item()] + headwater_path = headwater_path + [segment.headwater_path] + # gpd.GeoDataFrame({**data, "source": "dem_m"}) + dem_m_pts = gpd.GeoDataFrame({'stream_id': stream_ids, 'source': 'dem_m', 'elevation_m': dem_m_elev, 'headwater_path': headwater_path, 'index_count': index_count, 'geometry': split_points}, crs=path.crs, geometry='geometry') + dem_lat_thal_adj_pts = gpd.GeoDataFrame({'stream_id': stream_ids, 'source': 'dem_lat_thal_adj', 'elevation_m': dem_lat_thal_adj_elev, 'headwater_path': headwater_path, 'index_count': index_count, 'geometry': split_points}, crs=path.crs, geometry='geometry') + dem_thal_adj_pts = gpd.GeoDataFrame({'stream_id': stream_ids, 'source': 'thal_adj_dem', 'elevation_m': dem_thal_adj_elev, 'headwater_path': headwater_path, 'index_count': index_count, 'geometry': split_points}, crs=path.crs, geometry='geometry') + for raster in [dem_m_pts,dem_lat_thal_adj_pts,dem_thal_adj_pts]: + raster = raster.sort_values(by=['index_count']) + raster.set_index('index_count',inplace=True,drop=True) + raster = raster.reset_index(drop=True) + raster.index.names = ['index_count'] + raster = raster.reset_index(drop=False) + thalweg_points = thalweg_points.append(raster,ignore_index = True) + del raster + del dem_m_pts,dem_lat_thal_adj_pts,dem_thal_adj_pts + + del dem_lateral_thalweg_adj,dem_thalwegCond,dem_meters + + try: + # Remove nodata_pts and convert elevation to ft + thalweg_points = thalweg_points.loc[thalweg_points.elevation_m > 0.0] + thalweg_points.elevation_m = np.round(thalweg_points.elevation_m,3) + thalweg_points['elevation_ft'] = np.round(thalweg_points.elevation_m*3.28084,3) + + # Plot thalweg profile + plot_profile(thalweg_points, profile_plots_filename) + + # Filter final thalweg ajdusted layer + thal_adj_points = thalweg_points.loc[thalweg_points.source=='thal_adj_dem'].copy() + # thal_adj_points.to_file(profile_gpkg_filename,driver=getDriver(profile_gpkg_filename)) + + # Identify significant rises/drops in elevation + thal_adj_points['elev_change'] = thal_adj_points.groupby(['headwater_path', 'source'])['elevation_m'].apply(lambda x: x - x.shift()) + elev_changes = thal_adj_points.loc[(thal_adj_points.elev_change<=-lateral_elevation_threshold) | (thal_adj_points.elev_change>0.0)] + + if not elev_changes.empty: + # elev_changes.to_csv(profile_table_filename,index=False) + elev_changes.to_file(profile_gpkg_filename,index=False,driver=getDriver(profile_gpkg_filename)) + + + # Zoom in to plot only areas with steep elevation changes + # select_streams = elev_changes.stream_id.to_list() + # downstream_segments = [index + 1 for index in select_streams] + # upstream_segments = [index - 1 for index in select_streams] + # select_streams = list(set(upstream_segments + downstream_segments + select_streams)) + # thal_adj_points_select = thal_adj_points.loc[thal_adj_points.stream_id.isin(select_streams)] + # plot_profile(thal_adj_points_select, profile_plots_filename_zoom) + + except: + print(f"huc {huc} has {len(thalweg_points)} thalweg points") + +def get_downstream_segments(streams, headwater_col,headwater_id,flag_column,stream_id,stream_type): + + streams[flag_column] = False + streams['downstream_count'] = -9 + streams.loc[streams[headwater_col]==headwater_id,flag_column] = True + streams.loc[streams[headwater_col]==headwater_id,'downstream_count'] = 0 + count = 0 + + Q = deque(streams.loc[streams[headwater_col]==headwater_id,stream_id].tolist()) + visited = set() + + while Q: + q = Q.popleft() + + if q in visited: + continue + + visited.add(q) + + count = count + 1 + if stream_type == 'burnline': + + toNode,DnLevelPat = streams.loc[q,['ToNode','DnLevelPat']] + downstream_ids = streams.loc[streams['FromNode'] == toNode,:].index.tolist() + + # If multiple downstream_ids are returned select the ids that are along the main flow path (i.e. exclude segments that are diversions) + if len(set(downstream_ids)) > 1: # special case: remove duplicate NHDPlusIDs + + relevant_ids = [segment for segment in downstream_ids if DnLevelPat == streams.loc[segment,'LevelPathI']] + + else: + + relevant_ids = downstream_ids + + elif stream_type == 'derived': + + toNode = streams.loc[q,['NextDownID']].item() + relevant_ids = streams.loc[streams[stream_id] == toNode,:].index.tolist() + + streams.loc[relevant_ids,flag_column] = True + streams.loc[relevant_ids,'downstream_count'] = count + + for i in relevant_ids: + + if i not in visited: + Q.append(i) + + streams = streams.loc[streams[flag_column],:] + + return streams + + +def plot_profile(elevation_table,profile_plots_filename): + num_plots = len(elevation_table.headwater_path.unique()) + unique_rasters = elevation_table.source.unique() + if num_plots > 3: + columns = int(np.ceil(num_plots / 3)) + else: + columns = 1 + # palette = dict(zip(unique_rasters, sns.color_palette(n_colors=len(unique_rasters)))) + # palette.update({'dem_m':'gray'}) + sns.set(style="ticks") + if len(unique_rasters) > 1: + g = sns.FacetGrid(elevation_table, col="headwater_path", hue="source", hue_order=['dem_m', 'dem_lat_thal_adj', 'thal_adj_dem'], sharex=False, sharey=False,col_wrap=columns) + else: + g = sns.FacetGrid(elevation_table, col="headwater_path", hue="source", sharex=False, sharey=False,col_wrap=columns) + g.map(sns.lineplot, "index_count", "elevation_ft", palette="tab20c") + g.set_axis_labels(x_var="Longitudinal Profile (index)", y_var="Elevation (ft)") + # Iterate thorugh each axis to get individual y-axis bounds + for ax in g.axes.flat: + mins = [] + maxes = [] + for line in ax.lines: + mins = mins + [min(line.get_ydata())] + maxes = maxes + [max(line.get_ydata())] + min_y = min(mins) - (max(maxes) - min(mins))/10 + max_y = max(maxes) + (max(maxes) - min(mins))/10 + ax.set_ylim(min_y,max_y) + # if len(unique_rasters) > 1: + # ax.lines[0].set_linestyle("--") + # ax.lines[0].set_color('gray') + # box = ax.get_position() + # ax.set_position([box.x0, box.y0 + box.height * 0.1,box.width, box.height * 0.9]) + # Adjust the arrangement of the plots + # g.fig.tight_layout(w_pad=5) #w_pad=2 + g.add_legend() + # plt.legend(bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) + plt.subplots_adjust(bottom=0.25) + plt.savefig(profile_plots_filename) + plt.close() + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='generate rating curve plots and tables for FIM and USGS gages') + parser.add_argument('-fim_dir','--fim-dir', help='FIM output dir', required=True,type=str) + parser.add_argument('-output_dir','--output-dir', help='rating curves output folder', required=True,type=str) + # parser.add_argument('-rasters','--raster-list',help='list of rasters to be evaluated',required=True,type=str) + parser.add_argument('-stream_type','--stream-type',help='stream layer to be evaluated',required=True,type=str,choices=['derived','burnline']) + parser.add_argument('-point_density','--point-density',help='elevation sampling density',required=True,type=str,choices=['midpoints','all_points']) + parser.add_argument('-th','--elevation_threshold',help='significant elevation drop threshold in meters.',required=True) + parser.add_argument('-j','--number-of-jobs',help='number of workers',required=False,default=1,type=int) + + args = vars(parser.parse_args()) + + fim_dir = args['fim_dir'] + output_dir = args['output_dir'] + # raster_list = args['raster_list'] + stream_type = args['stream_type'] + point_density = args['point_density'] + number_of_jobs = args['number_of_jobs'] + + # dem_meters_dir = os.environ.get('dem_meters') + + plots_dir = join(output_dir,'plots') + os.makedirs(plots_dir, exist_ok=True) + spatial_dir = os.path.join(output_dir,'spatial_layers') + os.makedirs(spatial_dir, exist_ok=True) + + # Open log file + sys.__stdout__ = sys.stdout + log_file = open(join(output_dir,'thalweg_profile_comparison.log'),"w") + sys.stdout = log_file + + procs_list = [] + huc_list = os.listdir(fim_dir) + for huc in huc_list: + if huc != 'logs': + + huc_dir = os.path.join(fim_dir,huc) + flows_grid_boolean_filename = os.path.join(huc_dir,'flows_grid_boolean.tif') + dem_meters_filename = os.path.join(huc_dir,'dem_meters.tif') + dem_lateral_thalweg_adj_filename = os.path.join(huc_dir,'dem_lateral_thalweg_adj.tif') + dem_thalwegCond_filename = os.path.join(huc_dir,'dem_thalwegCond.tif') + profile_plots_filename = os.path.join(plots_dir,f"profile_drop_plots_{huc}_{point_density}_{stream_type}.png") + profile_gpkg_filename = os.path.join(spatial_dir,f"thalweg_elevation_changes_{huc}_{point_density}_{stream_type}.gpkg") + profile_table_filename = os.path.join(spatial_dir,f"thalweg_elevation_changes_{huc}_{point_density}_{stream_type}.csv") + + procs_list.append([huc_dir,stream_type,point_density,huc,dem_meters_filename,dem_lateral_thalweg_adj_filename,dem_thalwegCond_filename,profile_plots_filename,profile_gpkg_filename,profile_table_filename,flows_grid_boolean_filename]) + + # Initiate multiprocessing + print(f"Generating thalweg elevation profiles for {len(procs_list)} hucs using {number_of_jobs} jobs") + with Pool(processes=number_of_jobs) as pool: + # Get elevation values along thalweg for each headwater stream path + pool.map(compare_thalweg, procs_list) + + # Append all elevation change spatial layers to a single gpkg + spatial_list = os.listdir(spatial_dir) + agg_thalweg_elevations_gpkg_fileName = os.path.join(output_dir, f"agg_thalweg_elevation_changes_{point_density}_{stream_type}.gpkg") + agg_thalweg_elevation_table_fileName = os.path.join(output_dir, f"agg_thalweg_elevation_changes_{point_density}_{stream_type}.csv") + for layer in spatial_list: + + huc_gpd = gpd.read_file(os.path.join(spatial_dir,layer)) + + # Write aggregate layer + if os.path.isfile(agg_thalweg_elevations_gpkg_fileName): + huc_gpd.to_file(agg_thalweg_elevations_gpkg_fileName,driver=getDriver(agg_thalweg_elevations_gpkg_fileName),index=False, mode='a') + else: + huc_gpd.to_file(agg_thalweg_elevations_gpkg_fileName,driver=getDriver(agg_thalweg_elevations_gpkg_fileName),index=False) + + del huc_gpd + + # Create csv of elevation table + huc_table = gpd.read_file(agg_thalweg_elevations_gpkg_fileName) + huc_table.to_csv(agg_thalweg_elevation_table_fileName,index=False) + + # Close log file + sys.stdout = sys.__stdout__ + log_file.close() diff --git a/tools/tools_shared_functions.py b/tools/tools_shared_functions.py new file mode 100755 index 000000000..6cf0e7685 --- /dev/null +++ b/tools/tools_shared_functions.py @@ -0,0 +1,1450 @@ +#!/usr/bin/env python3 + +import os +import json +import rasterio +import pandas as pd +import geopandas as gpd +import requests +import numpy as np +import pathlib +from pathlib import Path +import rasterio.shutil +from rasterio.warp import calculate_default_transform, reproject, Resampling +import rasterio.crs +from rasterio import features +from shapely.geometry import shape +from shapely.geometry import Polygon +from shapely.geometry import MultiPolygon + + +def check_for_regression(stats_json_to_test, previous_version, previous_version_stats_json_path, regression_test_csv=None): + + difference_dict = {} + + # Compare stats_csv to previous_version_stats_file + stats_dict_to_test = json.load(open(stats_json_to_test)) + previous_version_stats_dict = json.load(open(previous_version_stats_json_path)) + + for stat, value in stats_dict_to_test.items(): + previous_version_value = previous_version_stats_dict[stat] + stat_value_diff = value - previous_version_value + difference_dict.update({stat + '_diff': stat_value_diff}) + + return difference_dict + + +def compute_contingency_stats_from_rasters(predicted_raster_path, benchmark_raster_path, agreement_raster=None, stats_csv=None, stats_json=None, mask_values=None, stats_modes_list=['total_area'], test_id='', mask_dict={}): + """ + This function contains FIM-specific logic to prepare raster datasets for use in the generic get_contingency_table_from_binary_rasters() function. + This function also calls the generic compute_stats_from_contingency_table() function and writes the results to CSV and/or JSON, depending on user input. + + Args: + predicted_raster_path (str): The path to the predicted, or modeled, FIM extent raster. + benchmark_raster_path (str): The path to the benchmark, or truth, FIM extent raster. + agreement_raster (str): Optional. An agreement raster will be written to this path. 0: True Negatives, 1: False Negative, 2: False Positive, 3: True Positive. + stats_csv (str): Optional. Performance statistics will be written to this path. CSV allows for readability and other tabular processes. + stats_json (str): Optional. Performance statistics will be written to this path. JSON allows for quick ingestion into Python dictionary in other processes. + + Returns: + stats_dictionary (dict): A dictionary of statistics produced by compute_stats_from_contingency_table(). Statistic names are keys and statistic values are the values. + """ + + # Get cell size of benchmark raster. + raster = rasterio.open(predicted_raster_path) + t = raster.transform + cell_x = t[0] + cell_y = t[4] + cell_area = abs(cell_x*cell_y) + + # Get contingency table from two rasters. + contingency_table_dictionary = get_contingency_table_from_binary_rasters(benchmark_raster_path, predicted_raster_path, agreement_raster, mask_values=mask_values, mask_dict=mask_dict) + + stats_dictionary = {} + + for stats_mode in contingency_table_dictionary: + true_negatives = contingency_table_dictionary[stats_mode]['true_negatives'] + false_negatives = contingency_table_dictionary[stats_mode]['false_negatives'] + false_positives = contingency_table_dictionary[stats_mode]['false_positives'] + true_positives = contingency_table_dictionary[stats_mode]['true_positives'] + masked_count = contingency_table_dictionary[stats_mode]['masked_count'] + file_handle = contingency_table_dictionary[stats_mode]['file_handle'] + + # Produce statistics from continency table and assign to dictionary. cell_area argument optional (defaults to None). + mode_stats_dictionary = compute_stats_from_contingency_table(true_negatives, false_negatives, false_positives, true_positives, cell_area, masked_count) + + # Write the mode_stats_dictionary to the stats_csv. + if stats_csv != None: + stats_csv = os.path.join(os.path.split(stats_csv)[0], file_handle + '_stats.csv') + df = pd.DataFrame.from_dict(mode_stats_dictionary, orient="index", columns=['value']) + df.to_csv(stats_csv) + + # Write the mode_stats_dictionary to the stats_json. + if stats_json != None: + stats_json = os.path.join(os.path.split(stats_csv)[0], file_handle + '_stats.json') + with open(stats_json, "w") as outfile: + json.dump(mode_stats_dictionary, outfile) + + stats_dictionary.update({stats_mode: mode_stats_dictionary}) + + return stats_dictionary + + +def profile_test_case_archive(archive_to_check, magnitude, stats_mode): + """ + This function searches multiple directories and locates previously produced performance statistics. + + Args: + archive_to_check (str): The directory path to search. + magnitude (str): Because a benchmark dataset may have multiple magnitudes, this argument defines + which magnitude is to be used when searching for previous statistics. + Returns: + archive_dictionary (dict): A dictionary of available statistics for previous versions of the domain and magnitude. + {version: {agreement_raster: agreement_raster_path, stats_csv: stats_csv_path, stats_json: stats_json_path}} + *Will only add the paths to files that exist. + + """ + + archive_dictionary = {} + + # List through previous version and check for available stats and maps. If available, add to dictionary. + available_versions_list = os.listdir(archive_to_check) + + if len(available_versions_list) == 0: + print("Cannot compare with -c flag because there are no data in the previous_versions directory.") + return + + for version in available_versions_list: + version_magnitude_dir = os.path.join(archive_to_check, version, magnitude) + stats_json = os.path.join(version_magnitude_dir, stats_mode + '_stats.json') + + if os.path.exists(stats_json): + archive_dictionary.update({version: {'stats_json': stats_json}}) + + return archive_dictionary + + +def compute_stats_from_contingency_table(true_negatives, false_negatives, false_positives, true_positives, cell_area=None, masked_count=None): + """ + This generic function takes contingency table metrics as arguments and returns a dictionary of contingency table statistics. + Much of the calculations below were taken from older Python files. This is evident in the inconsistent use of case. + + Args: + true_negatives (int): The true negatives from a contingency table. + false_negatives (int): The false negatives from a contingency table. + false_positives (int): The false positives from a contingency table. + true_positives (int): The true positives from a contingency table. + cell_area (float or None): This optional argument allows for area-based statistics to be calculated, in the case that + contingency table metrics were derived from areal analysis. + + Returns: + stats_dictionary (dict): A dictionary of statistics. Statistic names are keys and statistic values are the values. + Refer to dictionary definition in bottom of function for statistic names. + + """ + + import numpy as np + + total_population = true_negatives + false_negatives + false_positives + true_positives + + # Basic stats. +# Percent_correct = ((true_positives + true_negatives) / total_population) * 100 +# pod = true_positives / (true_positives + false_negatives) + + try: + FAR = false_positives / (true_positives + false_positives) + except ZeroDivisionError: + FAR = "NA" + + try: + CSI = true_positives / (true_positives + false_positives + false_negatives) + except ZeroDivisionError: + CSI = "NA" + + try: + BIAS = (true_positives + false_positives) / (true_positives + false_negatives) + except ZeroDivisionError: + BIAS = "NA" + + # Compute equitable threat score (ETS) / Gilbert Score. + try: + a_ref = ((true_positives + false_positives)*(true_positives + false_negatives)) / total_population + EQUITABLE_THREAT_SCORE = (true_positives - a_ref) / (true_positives - a_ref + false_positives + false_negatives) + except ZeroDivisionError: + EQUITABLE_THREAT_SCORE = "NA" + + if total_population == 0: + TP_perc, FP_perc, TN_perc, FN_perc = "NA", "NA", "NA", "NA" + else: + TP_perc = (true_positives / total_population) * 100 + FP_perc = (false_positives / total_population) * 100 + TN_perc = (true_negatives / total_population) * 100 + FN_perc = (false_negatives / total_population) * 100 + + predPositive = true_positives + false_positives + predNegative = true_negatives + false_negatives + obsPositive = true_positives + false_negatives + obsNegative = true_negatives + false_positives + + TP = float(true_positives) + TN = float(true_negatives) + FN = float(false_negatives) + FP = float(false_positives) + try: + MCC = (TP*TN - FP*FN)/ np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN)) + except ZeroDivisionError: + MCC = "NA" + + if masked_count != None: + total_pop_and_mask_pop = total_population + masked_count + if total_pop_and_mask_pop == 0: + masked_perc = "NA" + else: + masked_perc = (masked_count / total_pop_and_mask_pop) * 100 + else: + masked_perc = None + + # This checks if a cell_area has been provided, thus making areal calculations possible. + sq_km_converter = 1000000 + + if cell_area != None: + TP_area = (true_positives * cell_area) / sq_km_converter + FP_area = (false_positives * cell_area) / sq_km_converter + TN_area = (true_negatives * cell_area) / sq_km_converter + FN_area = (false_negatives * cell_area) / sq_km_converter + area = (total_population * cell_area) / sq_km_converter + + predPositive_area = (predPositive * cell_area) / sq_km_converter + predNegative_area = (predNegative * cell_area) / sq_km_converter + obsPositive_area = (obsPositive * cell_area) / sq_km_converter + obsNegative_area = (obsNegative * cell_area) / sq_km_converter + positiveDiff_area = predPositive_area - obsPositive_area + + if masked_count != None: + masked_area = (masked_count * cell_area) / sq_km_converter + else: + masked_area = None + + # If no cell_area is provided, then the contingeny tables are likely not derived from areal analysis. + else: + TP_area = None + FP_area = None + TN_area = None + FN_area = None + area = None + + predPositive_area = None + predNegative_area = None + obsPositive_area = None + obsNegative_area = None + positiveDiff_area = None + MCC = None + + if total_population == 0: + predPositive_perc, predNegative_perc, obsPositive_perc, obsNegative_perc , positiveDiff_perc = "NA", "NA", "NA", "NA", "NA" + else: + predPositive_perc = (predPositive / total_population) * 100 + predNegative_perc = (predNegative / total_population) * 100 + obsPositive_perc = (obsPositive / total_population) * 100 + obsNegative_perc = (obsNegative / total_population) * 100 + + positiveDiff_perc = predPositive_perc - obsPositive_perc + + if total_population == 0: + prevalence = "NA" + else: + prevalence = (true_positives + false_negatives) / total_population + + try: + PPV = true_positives / predPositive + except ZeroDivisionError: + PPV = "NA" + + try: + NPV = true_negatives / predNegative + except ZeroDivisionError: + NPV = "NA" + + try: + TNR = true_negatives / obsNegative + except ZeroDivisionError: + TNR = "NA" + + try: + TPR = true_positives / obsPositive + + except ZeroDivisionError: + TPR = "NA" + + try: + Bal_ACC = np.mean([TPR,TNR]) + except TypeError: + Bal_ACC = "NA" + + if total_population == 0: + ACC = "NA" + else: + ACC = (true_positives + true_negatives) / total_population + + try: + F1_score = (2*true_positives) / (2*true_positives + false_positives + false_negatives) + except ZeroDivisionError: + F1_score = "NA" + + stats_dictionary = {'true_negatives_count': int(true_negatives), + 'false_negatives_count': int(false_negatives), + 'true_positives_count': int(true_positives), + 'false_positives_count': int(false_positives), + 'contingency_tot_count': int(total_population), + 'cell_area_m2': cell_area, + + 'TP_area_km2': TP_area, + 'FP_area_km2': FP_area, + 'TN_area_km2': TN_area, + 'FN_area_km2': FN_area, + + 'contingency_tot_area_km2': area, + 'predPositive_area_km2': predPositive_area, + 'predNegative_area_km2': predNegative_area, + 'obsPositive_area_km2': obsPositive_area, + 'obsNegative_area_km2': obsNegative_area, + 'positiveDiff_area_km2': positiveDiff_area, + + 'CSI': CSI, + 'FAR': FAR, + 'TPR': TPR, + 'TNR': TNR, + + 'PPV': PPV, + 'NPV': NPV, + 'ACC': ACC, + 'Bal_ACC': Bal_ACC, + 'MCC': MCC, + 'EQUITABLE_THREAT_SCORE': EQUITABLE_THREAT_SCORE, + 'PREVALENCE': prevalence, + 'BIAS': BIAS, + 'F1_SCORE': F1_score, + + 'TP_perc': TP_perc, + 'FP_perc': FP_perc, + 'TN_perc': TN_perc, + 'FN_perc': FN_perc, + 'predPositive_perc': predPositive_perc, + 'predNegative_perc': predNegative_perc, + 'obsPositive_perc': obsPositive_perc, + 'obsNegative_perc': obsNegative_perc, + 'positiveDiff_perc': positiveDiff_perc, + + 'masked_count': int(masked_count), + 'masked_perc': masked_perc, + 'masked_area_km2': masked_area, + + } + + return stats_dictionary + + +def get_contingency_table_from_binary_rasters(benchmark_raster_path, predicted_raster_path, agreement_raster=None, mask_values=None, mask_dict={}): + """ + Produces contingency table from 2 rasters and returns it. Also exports an agreement raster classified as: + 0: True Negatives + 1: False Negative + 2: False Positive + 3: True Positive + + Args: + benchmark_raster_path (str): Path to the binary benchmark raster. 0 = phenomena not present, 1 = phenomena present, NoData = NoData. + predicted_raster_path (str): Path to the predicted raster. 0 = phenomena not present, 1 = phenomena present, NoData = NoData. + + Returns: + contingency_table_dictionary (dict): A Python dictionary of a contingency table. Key/value pair formatted as: + {true_negatives: int, false_negatives: int, false_positives: int, true_positives: int} + + """ + from rasterio.warp import reproject, Resampling + import rasterio + import numpy as np + import os + import rasterio.mask + import geopandas as gpd + from shapely.geometry import box + +# print("-----> Evaluating performance across the total area...") + # Load rasters. + benchmark_src = rasterio.open(benchmark_raster_path) + predicted_src = rasterio.open(predicted_raster_path) + predicted_array = predicted_src.read(1) + + benchmark_array_original = benchmark_src.read(1) + + if benchmark_array_original.shape != predicted_array.shape: + benchmark_array = np.empty(predicted_array.shape, dtype=np.int8) + + reproject(benchmark_array_original, + destination = benchmark_array, + src_transform = benchmark_src.transform, + src_crs = benchmark_src.crs, + src_nodata = benchmark_src.nodata, + dst_transform = predicted_src.transform, + dst_crs = predicted_src.crs, + dst_nodata = benchmark_src.nodata, + dst_resolution = predicted_src.res, + resampling = Resampling.nearest) + + predicted_array_raw = predicted_src.read(1) + + # Align the benchmark domain to the modeled domain. + benchmark_array = np.where(predicted_array==predicted_src.nodata, 10, benchmark_array) + + # Ensure zeros and ones for binary comparison. Assume that positive values mean flooding and 0 or negative values mean dry. + predicted_array = np.where(predicted_array==predicted_src.nodata, 10, predicted_array) # Reclassify NoData to 10 + predicted_array = np.where(predicted_array<0, 0, predicted_array) + predicted_array = np.where(predicted_array>0, 1, predicted_array) + + benchmark_array = np.where(benchmark_array==benchmark_src.nodata, 10, benchmark_array) # Reclassify NoData to 10 + + agreement_array = np.add(benchmark_array, 2*predicted_array) + agreement_array = np.where(agreement_array>4, 10, agreement_array) + + del benchmark_src, benchmark_array, predicted_array, predicted_array_raw + + # Loop through exclusion masks and mask the agreement_array. + if mask_dict != {}: + for poly_layer in mask_dict: + + operation = mask_dict[poly_layer]['operation'] + + if operation == 'exclude': + + poly_path = mask_dict[poly_layer]['path'] + buffer_val = mask_dict[poly_layer]['buffer'] + + reference = predicted_src + + bounding_box = gpd.GeoDataFrame({'geometry': box(*reference.bounds)}, index=[0], crs=reference.crs) + #Read layer using the bbox option. CRS mismatches are handled if bbox is passed a geodataframe (which it is). + poly_all = gpd.read_file(poly_path, bbox = bounding_box) + + # Make sure features are present in bounding box area before projecting. Continue to next layer if features are absent. + if poly_all.empty: + continue + +# print("-----> Masking at " + poly_layer + "...") + #Project layer to reference crs. + poly_all_proj = poly_all.to_crs(reference.crs) + # check if there are any lakes within our reference raster extent. + if poly_all_proj.empty: + #If no features within reference raster extent, create a zero array of same shape as reference raster. + poly_mask = np.zeros(reference.shape) + else: + #Check if a buffer value is passed to function. + if buffer_val is None: + #If features are present and no buffer is passed, assign geometry to variable. + geometry = poly_all_proj.geometry + else: + #If features are present and a buffer is passed, assign buffered geometry to variable. + geometry = poly_all_proj.buffer(buffer_val) + + #Perform mask operation on the reference raster and using the previously declared geometry geoseries. Invert set to true as we want areas outside of poly areas to be False and areas inside poly areas to be True. + in_poly,transform,c = rasterio.mask.raster_geometry_mask(reference, geometry, invert = True) + #Write mask array, areas inside polys are set to 1 and areas outside poly are set to 0. + poly_mask = np.where(in_poly == True, 1,0) + + # Perform mask. + masked_agreement_array = np.where(poly_mask == 1, 4, agreement_array) + + # Get rid of masked values outside of the modeled domain. + agreement_array = np.where(agreement_array == 10, 10, masked_agreement_array) + + contingency_table_dictionary = {} # Initialize empty dictionary. + + # Only write the agreement raster if user-specified. + if agreement_raster != None: + with rasterio.Env(): + profile = predicted_src.profile + profile.update(nodata=10) + with rasterio.open(agreement_raster, 'w', **profile) as dst: + dst.write(agreement_array, 1) + + # Write legend text file + legend_txt = os.path.join(os.path.split(agreement_raster)[0], 'read_me.txt') + + from datetime import datetime + + now = datetime.now() + current_time = now.strftime("%m/%d/%Y %H:%M:%S") + + with open(legend_txt, 'w') as f: + f.write("%s\n" % '0: True Negative') + f.write("%s\n" % '1: False Negative') + f.write("%s\n" % '2: False Positive') + f.write("%s\n" % '3: True Positive') + f.write("%s\n" % '4: Masked area (excluded from contingency table analysis). Mask layers: {mask_dict}'.format(mask_dict=mask_dict)) + f.write("%s\n" % 'Results produced at: {current_time}'.format(current_time=current_time)) + + # Store summed pixel counts in dictionary. + contingency_table_dictionary.update({'total_area':{'true_negatives': int((agreement_array == 0).sum()), + 'false_negatives': int((agreement_array == 1).sum()), + 'false_positives': int((agreement_array == 2).sum()), + 'true_positives': int((agreement_array == 3).sum()), + 'masked_count': int((agreement_array == 4).sum()), + 'file_handle': 'total_area' + + }}) + + # After agreement_array is masked with default mask layers, check for inclusion masks in mask_dict. + if mask_dict != {}: + for poly_layer in mask_dict: + + operation = mask_dict[poly_layer]['operation'] + + if operation == 'include': + poly_path = mask_dict[poly_layer]['path'] + buffer_val = mask_dict[poly_layer]['buffer'] + + reference = predicted_src + + bounding_box = gpd.GeoDataFrame({'geometry': box(*reference.bounds)}, index=[0], crs=reference.crs) + #Read layer using the bbox option. CRS mismatches are handled if bbox is passed a geodataframe (which it is). + poly_all = gpd.read_file(poly_path, bbox = bounding_box) + + # Make sure features are present in bounding box area before projecting. Continue to next layer if features are absent. + if poly_all.empty: + continue + +# print("-----> Evaluating performance at " + poly_layer + "...") + #Project layer to reference crs. + poly_all_proj = poly_all.to_crs(reference.crs) + # check if there are any lakes within our reference raster extent. + if poly_all_proj.empty: + #If no features within reference raster extent, create a zero array of same shape as reference raster. + poly_mask = np.zeros(reference.shape) + else: + #Check if a buffer value is passed to function. + if buffer_val is None: + #If features are present and no buffer is passed, assign geometry to variable. + geometry = poly_all_proj.geometry + else: + #If features are present and a buffer is passed, assign buffered geometry to variable. + geometry = poly_all_proj.buffer(buffer_val) + + #Perform mask operation on the reference raster and using the previously declared geometry geoseries. Invert set to true as we want areas outside of poly areas to be False and areas inside poly areas to be True. + in_poly,transform,c = rasterio.mask.raster_geometry_mask(reference, geometry, invert = True) + #Write mask array, areas inside polys are set to 1 and areas outside poly are set to 0. + poly_mask = np.where(in_poly == True, 1, 0) + + # Perform mask. + masked_agreement_array = np.where(poly_mask == 0, 4, agreement_array) # Changed to poly_mask == 0 + + # Get rid of masked values outside of the modeled domain. + temp_agreement_array = np.where(agreement_array == 10, 10, masked_agreement_array) + + if buffer_val == None: # The buffer used is added to filename, and 0 is easier to read than None. + buffer_val = 0 + + poly_handle = poly_layer + '_b' + str(buffer_val) + 'm' + + # Write the layer_agreement_raster. + layer_agreement_raster = os.path.join(os.path.split(agreement_raster)[0], poly_handle + '_agreement.tif') + with rasterio.Env(): + profile = predicted_src.profile + profile.update(nodata=10) + with rasterio.open(layer_agreement_raster, 'w', **profile) as dst: + dst.write(temp_agreement_array, 1) + + + # Store summed pixel counts in dictionary. + contingency_table_dictionary.update({poly_handle:{'true_negatives': int((temp_agreement_array == 0).sum()), + 'false_negatives': int((temp_agreement_array == 1).sum()), + 'false_positives': int((temp_agreement_array == 2).sum()), + 'true_positives': int((temp_agreement_array == 3).sum()), + 'masked_count': int((temp_agreement_array == 4).sum()), + 'file_handle': poly_handle + }}) + + return contingency_table_dictionary +######################################################################## +######################################################################## +#Functions related to categorical fim and ahps evaluation +######################################################################## +def get_metadata(metadata_url, select_by, selector, must_include = None, upstream_trace_distance = None, downstream_trace_distance = None ): + ''' + Retrieve metadata for a site or list of sites. + + Parameters + ---------- + metadata_url : STR + metadata base URL. + select_by : STR + Location search option. + selector : LIST + Value to match location data against. Supplied as a LIST. + must_include : STR, optional + What attributes are required to be valid response. The default is None. + upstream_trace_distance : INT, optional + Distance in miles upstream of site to trace NWM network. The default is None. + downstream_trace_distance : INT, optional + Distance in miles downstream of site to trace NWM network. The default is None. + + Returns + ------- + metadata_list : LIST + Dictionary or list of dictionaries containing metadata at each site. + metadata_dataframe : Pandas DataFrame + Dataframe of metadata for each site. + + ''' + + #Format selector variable in case multiple selectors supplied + format_selector = '%2C'.join(selector) + #Define the url + url = f'{metadata_url}/{select_by}/{format_selector}/' + #Assign optional parameters to a dictionary + params = {} + params['must_include'] = must_include + params['upstream_trace_distance'] = upstream_trace_distance + params['downstream_trace_distance'] = downstream_trace_distance + #Request data from url + response = requests.get(url, params = params) + if response.ok: + #Convert data response to a json + metadata_json = response.json() + #Get the count of returned records + location_count = metadata_json['_metrics']['location_count'] + #Get metadata + metadata_list = metadata_json['locations'] + #Add timestamp of WRDS retrieval + timestamp = response.headers['Date'] + #Add timestamp of sources retrieval + timestamp_list = metadata_json['data_sources']['metadata_sources'] + + # Default timestamps to "Not available" and overwrite with real values if possible. + nwis_timestamp, nrldb_timestamp = "Not available", "Not available" + for timestamp in timestamp_list: + if "NWIS" in timestamp: + nwis_timestamp = timestamp + if "NRLDB" in timestamp: + nrldb_timestamp = timestamp + +# nrldb_timestamp, nwis_timestamp = metadata_json['data_sources']['metadata_sources'] + #get crosswalk info (always last dictionary in list) + crosswalk_info = metadata_json['data_sources'] + #Update each dictionary with timestamp and crosswalk info also save to DataFrame. + for metadata in metadata_list: + metadata.update({"wrds_timestamp": timestamp}) + metadata.update({"nrldb_timestamp":nrldb_timestamp}) + metadata.update({"nwis_timestamp":nwis_timestamp}) + metadata.update(crosswalk_info) + metadata_dataframe = pd.json_normalize(metadata_list) + #Replace all periods with underscores in column names + metadata_dataframe.columns = metadata_dataframe.columns.str.replace('.','_') + else: + #if request was not succesful, print error message. + print(f'Code: {response.status_code}\nMessage: {response.reason}\nURL: {response.url}') + #Return empty outputs + metadata_list = [] + metadata_dataframe = pd.DataFrame() + return metadata_list, metadata_dataframe + +######################################################################## +#Function to assign HUC code using the WBD spatial layer using a spatial join +######################################################################## +def aggregate_wbd_hucs(metadata_list, wbd_huc8_path, retain_attributes = False): + ''' + Assigns the proper FIM HUC 08 code to each site in the input DataFrame. + Converts input DataFrame to a GeoDataFrame using lat/lon attributes + with sites containing null nws_lid/lat/lon removed. Reprojects GeoDataFrame + to same CRS as the HUC 08 layer. Performs a spatial join to assign the + HUC 08 layer to the GeoDataFrame. Sites that are not assigned a HUC + code removed as well as sites in Alaska and Canada. + + Parameters + ---------- + metadata_list: List of Dictionaries + Output list from get_metadata + wbd_huc8_path : pathlib Path + Path to HUC8 wbd layer (assumed to be geopackage format) + retain_attributes ; Bool OR List + Flag to define attributes of output GeoDataBase. If True, retain + all attributes. If False, the site metadata will be trimmed to a + default list. If a list of desired attributes is supplied these + will serve as the retained attributes. + Returns + ------- + dictionary : DICT + Dictionary with HUC (key) and corresponding AHPS codes (values). + all_gdf: GeoDataFrame + GeoDataFrame of all NWS_LID sites. + + ''' + #Import huc8 layer as geodataframe and retain necessary columns + huc8 = gpd.read_file(wbd_huc8_path, layer = 'WBDHU8') + huc8 = huc8[['HUC8','name','states', 'geometry']] + #Define EPSG codes for possible latlon datum names (default of NAD83 if unassigned) + crs_lookup ={'NAD27':'EPSG:4267', 'NAD83':'EPSG:4269', 'WGS84': 'EPSG:4326'} + #Create empty geodataframe and define CRS for potential horizontal datums + metadata_gdf = gpd.GeoDataFrame() + #Iterate through each site + for metadata in metadata_list: + #Convert metadata to json + df = pd.json_normalize(metadata) + #Columns have periods due to nested dictionaries + df.columns = df.columns.str.replace('.', '_') + #Drop any metadata sites that don't have lat/lon populated + df.dropna(subset = ['identifiers_nws_lid','usgs_preferred_latitude', 'usgs_preferred_longitude'], inplace = True) + #If dataframe still has data + if not df.empty: + #Get horizontal datum + h_datum = df['usgs_preferred_latlon_datum_name'].item() + #Look up EPSG code, if not returned Assume NAD83 as default. + dict_crs = crs_lookup.get(h_datum,'EPSG:4269_ Assumed') + #We want to know what sites were assumed, hence the split. + src_crs, *message = dict_crs.split('_') + #Convert dataframe to geodataframe using lat/lon (USGS). Add attribute of assigned crs (label ones that are assumed) + site_gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['usgs_preferred_longitude'], df['usgs_preferred_latitude']), crs = src_crs) + #Field to indicate if a latlon datum was assumed + site_gdf['assigned_crs'] = src_crs + ''.join(message) + + #Reproject to huc 8 crs + site_gdf = site_gdf.to_crs(huc8.crs) + #Append site geodataframe to metadata geodataframe + metadata_gdf = metadata_gdf.append(site_gdf, ignore_index = True) + + #Trim metadata to only have certain fields. + if not retain_attributes: + metadata_gdf = metadata_gdf[['identifiers_nwm_feature_id', 'identifiers_nws_lid', 'identifiers_usgs_site_code', 'geometry']] + #If a list of attributes is supplied then use that list. + elif isinstance(retain_attributes,list): + metadata_gdf = metadata_gdf[retain_attributes] + + #Perform a spatial join to get the WBD HUC 8 assigned to each AHPS + joined_gdf = gpd.sjoin(metadata_gdf, huc8, how = 'inner', op = 'intersects', lsuffix = 'ahps', rsuffix = 'wbd') + joined_gdf = joined_gdf.drop(columns = 'index_wbd') + + #Remove all Alaska HUCS (Not in NWM v2.0 domain) + joined_gdf = joined_gdf[~joined_gdf.states.str.contains('AK')] + + #Create a dictionary of huc [key] and nws_lid[value] + dictionary = joined_gdf.groupby('HUC8')['identifiers_nws_lid'].apply(list).to_dict() + + return dictionary, joined_gdf + +######################################################################## +def mainstem_nwm_segs(metadata_url, list_of_sites): + ''' + Define the mainstems network. Currently a 4 pass approach that probably needs refined. + Once a final method is decided the code can be shortened. Passes are: + 1) Search downstream of gages designated as upstream. This is done to hopefully reduce the issue of mapping starting at the nws_lid. 91038 segments + 2) Search downstream of all LID that are rfc_forecast_point = True. Additional 48,402 segments + 3) Search downstream of all evaluated sites (sites with detailed FIM maps) Additional 222 segments + 4) Search downstream of all sites in HI/PR (locations have no rfc_forecast_point = True) Additional 408 segments + + Parameters + ---------- + metadata_url : STR + URL of API. + list_of_sites : LIST + List of evaluated sites. + + Returns + ------- + ms_nwm_segs_set : SET + Mainstems network segments as a set. + + ''' + + #Define the downstream trace distance + downstream_trace_distance = 'all' + + #Trace downstream from all 'headwater' usgs gages + select_by = 'tag' + selector = ['usgs_gages_ii_ref_headwater'] + must_include = None + gages_list, gages_dataframe = get_metadata(metadata_url = metadata_url, select_by = select_by, selector = selector, must_include = must_include, upstream_trace_distance = None, downstream_trace_distance = downstream_trace_distance ) + + #Trace downstream from all rfc_forecast_point. + select_by = 'nws_lid' + selector = ['all'] + must_include = 'nws_data.rfc_forecast_point' + fcst_list, fcst_dataframe = get_metadata(metadata_url = metadata_url, select_by = select_by, selector = selector, must_include = must_include, upstream_trace_distance = None, downstream_trace_distance = downstream_trace_distance ) + + #Trace downstream from all evaluated ahps sites. + select_by = 'nws_lid' + selector = list_of_sites + must_include = None + eval_list, eval_dataframe = get_metadata(metadata_url = metadata_url, select_by = select_by, selector = selector, must_include = must_include, upstream_trace_distance = None, downstream_trace_distance = downstream_trace_distance ) + + #Trace downstream from all sites in HI/PR. + select_by = 'state' + selector = ['HI','PR'] + must_include = None + islands_list, islands_dataframe = get_metadata(metadata_url = metadata_url, select_by = select_by, selector = selector, must_include = must_include, upstream_trace_distance = None, downstream_trace_distance = downstream_trace_distance ) + + #Combine all lists of metadata dictionaries into a single list. + combined_lists = gages_list + fcst_list + eval_list + islands_list + #Define list that will contain all segments listed in metadata. + all_nwm_segments = [] + #For each lid metadata dictionary in list + for lid in combined_lists: + #get all downstream segments + downstream_nwm_segs = lid.get('downstream_nwm_features') + #Append downstream segments + if downstream_nwm_segs: + all_nwm_segments.extend(downstream_nwm_segs) + #Get the nwm feature id associated with the location + location_nwm_seg = lid.get('identifiers').get('nwm_feature_id') + if location_nwm_seg: + #Append nwm segment (conver to list) + all_nwm_segments.extend([location_nwm_seg]) + #Remove duplicates by assigning to a set. + ms_nwm_segs_set = set(all_nwm_segments) + + return ms_nwm_segs_set + +############################################################################## +#Function to create list of NWM segments +############################################################################### +def get_nwm_segs(metadata): + ''' + Using the metadata output from "get_metadata", output the NWM segments. + + Parameters + ---------- + metadata : DICT + Dictionary output from "get_metadata" function. + + Returns + ------- + all_segments : LIST + List of all NWM segments. + + ''' + + nwm_feature_id = metadata.get('identifiers').get('nwm_feature_id') + upstream_nwm_features = metadata.get('upstream_nwm_features') + downstream_nwm_features = metadata.get('downstream_nwm_features') + + all_segments = [] + #Convert NWM feature id segment to a list (this is always a string or empty) + if nwm_feature_id: + nwm_feature_id = [nwm_feature_id] + all_segments.extend(nwm_feature_id) + #Add all upstream segments (always a list or empty) + if upstream_nwm_features: + all_segments.extend(upstream_nwm_features) + #Add all downstream segments (always a list or empty) + if downstream_nwm_features: + all_segments.extend(downstream_nwm_features) + + return all_segments + +####################################################################### +#Thresholds +####################################################################### +def get_thresholds(threshold_url, select_by, selector, threshold = 'all'): + ''' + Get nws_lid threshold stages and flows (i.e. bankfull, action, minor, + moderate, major). Returns a dictionary for stages and one for flows. + + Parameters + ---------- + threshold_url : STR + WRDS threshold API. + select_by : STR + Type of site (nws_lid, usgs_site_code etc). + selector : STR + Site for selection. Must be a single site. + threshold : STR, optional + Threshold option. The default is 'all'. + + Returns + ------- + stages : DICT + Dictionary of stages at each threshold. + flows : DICT + Dictionary of flows at each threshold. + + ''' + params = {} + params['threshold'] = threshold + url = f'{threshold_url}/{select_by}/{selector}' + response = requests.get(url, params = params) + if response.ok: + thresholds_json = response.json() + #Get metadata + thresholds_info = thresholds_json['value_set'] + #Initialize stages/flows dictionaries + stages = {} + flows = {} + #Check if thresholds information is populated. If site is non-existent thresholds info is blank + if thresholds_info: + #Get all rating sources and corresponding indexes in a dictionary + rating_sources = {i.get('calc_flow_values').get('rating_curve').get('source'): index for index, i in enumerate(thresholds_info)} + #Get threshold data use USGS Rating Depot (priority) otherwise NRLDB. + if 'USGS Rating Depot' in rating_sources: + threshold_data = thresholds_info[rating_sources['USGS Rating Depot']] + elif 'NRLDB' in rating_sources: + threshold_data = thresholds_info[rating_sources['NRLDB']] + #If neither USGS or NRLDB is available use first dictionary to get stage values. + else: + threshold_data = thresholds_info[0] + #Get stages and flows for each threshold + if threshold_data: + stages = threshold_data['stage_values'] + flows = threshold_data['calc_flow_values'] + #Add source information to stages and flows. Flows source inside a nested dictionary. Remove key once source assigned to flows. + stages['source'] = threshold_data.get('metadata').get('threshold_source') + flows['source'] = flows.get('rating_curve', {}).get('source') + flows.pop('rating_curve', None) + #Add timestamp WRDS data was retrieved. + stages['wrds_timestamp'] = response.headers['Date'] + flows['wrds_timestamp'] = response.headers['Date'] + #Add Site information + stages['nws_lid'] = threshold_data.get('metadata').get('nws_lid') + flows['nws_lid'] = threshold_data.get('metadata').get('nws_lid') + stages['usgs_site_code'] = threshold_data.get('metadata').get('usgs_site_code') + flows['usgs_site_code'] = threshold_data.get('metadata').get('usgs_site_code') + stages['units'] = threshold_data.get('metadata').get('stage_units') + flows['units'] = threshold_data.get('metadata').get('calc_flow_units') + return stages, flows + +######################################################################## +# Function to write flow file +######################################################################## +def flow_data(segments, flows, convert_to_cms = True): + ''' + Given a list of NWM segments and a flow value in cfs, convert flow to + cms and return a DataFrame that is set up for export to a flow file. + + Parameters + ---------- + segments : LIST + List of NWM segments. + flows : FLOAT + Flow in CFS. + convert_to_cms : BOOL + Flag to indicate if supplied flows should be converted to metric. + Default value is True (assume input flows are CFS). + + Returns + ------- + flow_data : DataFrame + Dataframe ready for export to a flow file. + + ''' + if convert_to_cms: + #Convert cfs to cms + cfs_to_cms = 0.3048**3 + flows_cms = round(flows * cfs_to_cms,2) + else: + flows_cms = round(flows,2) + + flow_data = pd.DataFrame({'feature_id':segments, 'discharge':flows_cms}) + flow_data = flow_data.astype({'feature_id' : int , 'discharge' : float}) + return flow_data +####################################################################### +#Function to get datum information +####################################################################### +def get_datum(metadata): + ''' + Given a record from the metadata endpoint, retrieve important information + related to the datum and site from both NWS and USGS sources. This information + is saved to a dictionary with common keys. USGS has more data available so + it has more keys. + + Parameters + ---------- + metadata : DICT + Single record from the get_metadata function. Must iterate through + the get_metadata output list. + + Returns + ------- + nws_datums : DICT + Dictionary of NWS data. + usgs_datums : DICT + Dictionary of USGS Data. + + ''' + #Get site and datum information from nws sub-dictionary. Use consistent naming between USGS and NWS sources. + nws_datums = {} + nws_datums['nws_lid'] = metadata['identifiers']['nws_lid'] + nws_datums['usgs_site_code'] = metadata['identifiers']['usgs_site_code'] + nws_datums['state'] = metadata['nws_data']['state'] + nws_datums['datum'] = metadata['nws_data']['zero_datum'] + nws_datums['vcs'] = metadata['nws_data']['vertical_datum_name'] + nws_datums['lat'] = metadata['nws_data']['latitude'] + nws_datums['lon'] = metadata['nws_data']['longitude'] + nws_datums['crs'] = metadata['nws_data']['horizontal_datum_name'] + nws_datums['source'] = 'nws_data' + + #Get site and datum information from usgs_data sub-dictionary. Use consistent naming between USGS and NWS sources. + usgs_datums = {} + usgs_datums['nws_lid'] = metadata['identifiers']['nws_lid'] + usgs_datums['usgs_site_code'] = metadata['identifiers']['usgs_site_code'] + usgs_datums['active'] = metadata['usgs_data']['active'] + usgs_datums['state'] = metadata['usgs_data']['state'] + usgs_datums['datum'] = metadata['usgs_data']['altitude'] + usgs_datums['vcs'] = metadata['usgs_data']['alt_datum_code'] + usgs_datums['datum_acy'] = metadata['usgs_data']['alt_accuracy_code'] + usgs_datums['datum_meth'] = metadata['usgs_data']['alt_method_code'] + usgs_datums['lat'] = metadata['usgs_data']['latitude'] + usgs_datums['lon'] = metadata['usgs_data']['longitude'] + usgs_datums['crs'] = metadata['usgs_data']['latlon_datum_name'] + usgs_datums['source'] = 'usgs_data' + + return nws_datums, usgs_datums +######################################################################## +#Function to convert horizontal datums +######################################################################## +def convert_latlon_datum(lat,lon,src_crs,dest_crs): + ''' + Converts latitude and longitude datum from a source CRS to a dest CRS + using geopandas and returns the projected latitude and longitude coordinates. + + Parameters + ---------- + lat : FLOAT + Input Latitude. + lon : FLOAT + Input Longitude. + src_crs : STR + CRS associated with input lat/lon. Geopandas must recognize code. + dest_crs : STR + Target CRS that lat/lon will be projected to. Geopandas must recognize code. + + Returns + ------- + new_lat : FLOAT + Reprojected latitude coordinate in dest_crs. + new_lon : FLOAT + Reprojected longitude coordinate in dest_crs. + + ''' + + #Create a temporary DataFrame containing the input lat/lon. + temp_df = pd.DataFrame({'lat':[lat],'lon':[lon]}) + #Convert dataframe to a GeoDataFrame using the lat/lon coords. Input CRS is assigned. + temp_gdf = gpd.GeoDataFrame(temp_df, geometry=gpd.points_from_xy(temp_df.lon, temp_df.lat), crs = src_crs) + #Reproject GeoDataFrame to destination CRS. + reproject = temp_gdf.to_crs(dest_crs) + #Get new Lat/Lon coordinates from the geometry data. + new_lat,new_lon = [reproject.geometry.y.item(), reproject.geometry.x.item()] + return new_lat, new_lon +####################################################################### +#Function to get conversion adjustment NGVD to NAVD in FEET +####################################################################### +def ngvd_to_navd_ft(datum_info, region = 'contiguous'): + ''' + Given the lat/lon, retrieve the adjustment from NGVD29 to NAVD88 in feet. + Uses NOAA tidal API to get conversion factor. Requires that lat/lon is + in NAD27 crs. If input lat/lon are not NAD27 then these coords are + reprojected to NAD27 and the reproject coords are used to get adjustment. + There appears to be an issue when region is not in contiguous US. + + Parameters + ---------- + lat : FLOAT + Latitude. + lon : FLOAT + Longitude. + + Returns + ------- + datum_adj_ft : FLOAT + Vertical adjustment in feet, from NGVD29 to NAVD88, and rounded to nearest hundredth. + + ''' + #If crs is not NAD 27, convert crs to NAD27 and get adjusted lat lon + if datum_info['crs'] != 'NAD27': + lat, lon = convert_latlon_datum(datum_info['lat'],datum_info['lon'],datum_info['crs'],'NAD27') + else: + #Otherwise assume lat/lon is in NAD27. + lat = datum_info['lat'] + lon = datum_info['lon'] + + #Define url for datum API + datum_url = 'https://vdatum.noaa.gov/vdatumweb/api/tidal' + + #Define parameters. Hard code most parameters to convert NGVD to NAVD. + params = {} + params['lat'] = lat + params['lon'] = lon + params['region'] = region + params['s_h_frame'] = 'NAD27' #Source CRS + params['s_v_frame'] = 'NGVD29' #Source vertical coord datum + params['s_vertical_unit'] = 'm' #Source vertical units + params['src_height'] = 0.0 #Source vertical height + params['t_v_frame'] = 'NAVD88' #Target vertical datum + params['tar_vertical_unit'] = 'm' #Target vertical height + + #Call the API + response = requests.get(datum_url, params = params) + #If succesful get the navd adjustment + if response: + results = response.json() + #Get adjustment in meters (NGVD29 to NAVD88) + adjustment = results['tar_height'] + #convert meters to feet + adjustment_ft = round(float(adjustment) * 3.28084,2) + else: + adjustment_ft = None + return adjustment_ft +####################################################################### +#Function to download rating curve from API +####################################################################### +def get_rating_curve(rating_curve_url, location_ids): + ''' + Given list of location_ids (nws_lids, usgs_site_codes, etc) get the + rating curve from WRDS API and export as a DataFrame. + + Parameters + ---------- + rating_curve_url : STR + URL to retrieve rating curve + location_ids : LIST + List of location ids. Can be nws_lid or usgs_site_codes. + + Returns + ------- + all_curves : pandas DataFrame + Rating curves from input list as well as other site information. + + ''' + #Define DataFrame to contain all returned curves. + all_curves = pd.DataFrame() + + #Define call to retrieve all rating curve information from WRDS. + joined_location_ids = '%2C'.join(location_ids) + url = f'{rating_curve_url}/{joined_location_ids}' + + #Call the API + response = requests.get(url) + + #If successful + if response.ok: + + #Write return to json and extract the rating curves + site_json = response.json() + rating_curves_list = site_json['rating_curves'] + + #For each curve returned + for curve in rating_curves_list: + #Check if a curve was populated (e.g wasn't blank) + if curve: + + #Write rating curve to pandas dataframe as well as site attributes + curve_df = pd.DataFrame(curve['rating_curve'],dtype=float) + + #Add other information such as site, site type, source, units, and timestamp. + curve_df['location_id'] = curve['metadata']['location_id'] + curve_df['location_type'] = curve['metadata']['id_type'] + curve_df['source'] = curve['metadata']['source'] + curve_df['flow_units'] = curve['metadata']['flow_unit'] + curve_df['stage_units'] = curve['metadata']['stage_unit'] + curve_df['wrds_timestamp'] = response.headers['Date'] + + #Append rating curve to DataFrame containing all curves + all_curves = all_curves.append(curve_df) + else: + continue + + return all_curves +####################################################################### +#Following Functions used for preprocesing of AHPS sites (NWS and USGS) +######################################################################## + +####################################################################### +#Function to return a correct maps. +######################################################################## +def select_grids(dataframe, stages, datum88, buffer): + ''' + Given a DataFrame (in a specific format), and a dictionary of stages, and the datum (in navd88). + loop through the available inundation datasets and find the datasets that are equal to or immediately above the thresholds and only return 1 dataset per threshold (if any). + + Parameters + ---------- + dataframe : DataFrame + DataFrame that has to be in a specific format and contains the stages and paths to the inundation datasets. + stages : DICT + Dictionary of thresholds (key) and stages (values) + datum88: FLOAT + The datum associated with the LID that is pre-converted to NAVD88 (if needed) + buffer: Float + Interval which the uppder bound can be assigned. For example, Threshold + buffer = upper bound. Recommended to make buffer 0.1 greater than desired interval as code selects maps < and not <= + + Returns + ------- + maps : DICT + Dictionary of threshold (key) and inundation dataset path (value) + map_flows: DICT + Dictionary of threshold (key) and flows in CFS rounded to the nearest whole number associated with the selected maps (value) + + ''' + #Define threshold categories + thresholds = ['action', 'minor', 'moderate', 'major'] + maps = {} + map_flows={} + #For each threshold, pick the appropriate map for analysis. + for i,threshold in enumerate(thresholds): + #Check if stage is None + if not stages[threshold] is None: + #Define the threshold floor elevation (navd88). + lower_bound = round((stages[threshold] + datum88),1) + #Define the threshold ceiling (navd88) + upper_bound = round((stages[threshold] + datum88 + buffer),1) + #For thresholds that are action, minor, moderate + if threshold in ['action', 'minor', 'moderate']: + #Make sure the next threshold has a valid stage + if stages[thresholds[i+1]] is None: + next_threshold = upper_bound + else: + #Determine what the next threshold elevation is. + next_threshold = round((stages[thresholds[i+1]] + datum88),1) + #Make sure the upper_bound is not greater than the next threshold, if it is then reassign upper_bound. + if upper_bound > next_threshold: + upper_bound = next_threshold + #Get the single map which meets the criteria. + value = dataframe.query(f'({lower_bound}<=elevation) & (elevation<{upper_bound})')['elevation'].min() + #For major threshold + else: + #Get the single map which meets criteria. + value = dataframe.query(f'({lower_bound}<=elevation) & (elevation<{upper_bound})')['elevation'].min() + + #If the selected value is a number + if np.isfinite(value): + #Get the map path and the flow associated with the map (rounded to nearest whole number) + map_path = dataframe.query(f'elevation == {value}')['path'].item() + map_flow = round(dataframe.query(f'elevation == {value}')['flow'].item(),0) + #Check to see if map_flow is valid (if beyond rating_curve it is nan) + if not np.isfinite(map_flow): + map_path = 'No Flow' + map_flow = 'No Flow' + + #If the selected value is not a number (or interpolated flow is nan caused by elevation of map which is beyond rating curve range), then map_path and map_flows are both set to 'No Map'. + else: + map_path = 'No Map' + map_flow = 'No Map' + else: + map_path = 'No Threshold' + map_flow = 'No Threshold' + + #Write map paths and flows to dictionary + maps[threshold] = map_path + map_flows[threshold] = map_flow + + #Get the maximum inundation map (using elevation) and this will be the domain extent + max_value = dataframe['elevation'].max() + map_path = dataframe.query(f'elevation == {max_value}')['path'].item() + map_flow = 'Not Used' + maps['extent'] = map_path + map_flows['extent'] = map_flow + + return maps,map_flows + +####################################################################### +#Process AHPS Extent Grid (Fill Holes) +####################################################################### +def process_extent(extent, profile, output_raster = False): + ''' + Convert raster to feature (using raster_to_feature), the footprint is used so all raster values are set to 1 where there is data. + fill all donut holes in resulting feature. + Filled geometry is then converted back to raster using same raster properties as input profile. + Output raster will have be encoded as follows: + filled footprint (wet) = 1 + remaining area in raster domain (dry) = 0 + NoData = 3 + + Parameters + ---------- + extent : Rasterio Dataset Reader + Path to extent raster + extent_profile: Rasterio Profile + profile related to the extent argument + output_raster: STR + Path to output raster. If no path supplied, then no raster is written to disk. default = False + + Returns (If no output raster specified) + ------- + extent_filled_raster : rasterio dataset + Extent raster with filled donut holes + profile : rasterio profile + Profile associated with extent_filled_raster + + ''' + + #Convert extent to feature and explode geometry + poly_extent = raster_to_feature(extent, profile, footprint_only = True) + poly_extent = poly_extent.explode() + + #Fill holes in extent + poly_extent_fill_holes=MultiPolygon(Polygon(p.exterior) for p in poly_extent['geometry']) + # loop through the filled polygons and insert the new geometry + for i in range(len(poly_extent_fill_holes)): + poly_extent.loc[i,'geometry'] = poly_extent_fill_holes[i] + + #Dissolve filled holes with main map and explode + poly_extent['dissolve_field'] = 1 + poly_extent = poly_extent.dissolve(by = 'dissolve_field') + poly_extent = poly_extent.explode() + poly_extent = poly_extent.reset_index() + + #Convert filled polygon back to raster + extent_filled_raster = features.rasterize(((geometry, 1) for geometry in poly_extent['geometry']), fill = 0, dtype = 'int32',transform = profile['transform'], out_shape = (profile['height'], profile['width'])) + + #Update profile properties (dtype and no data) + profile.update(dtype = rasterio.int32) + profile.update(nodata=0) + + #Check if output raster is specified. If so, the write extent filled raster to disk. + if output_raster: + #Create directory + Path(output_raster).parent.mkdir(parents = True, exist_ok = True) + with rasterio.Env(): + with rasterio.open(output_raster, 'w', **profile) as dst: + dst.write(extent_filled_raster, 1) + #If no output raster is supplied the return the rasterio array and profile. + else: + return extent_filled_raster, profile +######################################################################## +#Convert raster to polygon +######################################################################## +def raster_to_feature(grid, profile_override = False, footprint_only = False): + ''' + Given a grid path, convert to vector, dissolved by grid value, in GeoDataFrame format. + + Parameters + ---------- + grid_path : pathlib path OR rasterio Dataset Reader + Path to grid or a rasterio Dataset Reader + profile_override: rasterio Profile + Default is False, If a rasterio Profile is supplied, it will dictate the transform and crs. + footprint_only: BOOL + If true, dataset will be divided by itself to remove all unique values. If False, all values in grid will be carried through on raster to feature conversion. default = False + + Returns + ------- + dissolve_geodatabase : GeoDataFrame + Dissolved (by gridvalue) vector data in GeoDataFrame. + + ''' + #Determine what format input grid is: + #If a pathlib path, open with rasterio + if isinstance(grid, pathlib.PurePath): + dataset = rasterio.open(grid) + #If a rasterio dataset object, assign to dataset + elif isinstance(grid, rasterio.DatasetReader): + dataset = grid + + #Get data/mask and profile properties from dataset + data = dataset.read(1) + msk = dataset.read_masks(1) + data_transform = dataset.transform + coord_sys = dataset.crs + + #If a profile override was supplied, use it to get the transform and coordinate system. + if profile_override: + data_transform = profile_override['transform'] + coord_sys = profile_override['crs'] + + #If a footprint of the raster is desired, convert all data values to 1 + if footprint_only: + data[msk == 255] = 1 + + #Convert grid to feature + spatial = [] + values = [] + for geom, val in rasterio.features.shapes(data, mask = msk, transform = data_transform): + spatial.append(shape(geom)) + values.append(val) + spatial_geodataframe = gpd.GeoDataFrame({'values': values,'geometry':spatial }, crs = coord_sys) + dissolve_geodataframe = spatial_geodataframe.dissolve(by = 'values') + return dissolve_geodataframe +######################################################################## +#Create AHPS Benchmark Grid +######################################################################## +def process_grid(benchmark, benchmark_profile, domain, domain_profile, reference_raster): + ''' + Given a benchmark grid and profile, a domain rasterio dataset and profile, and a reference raster, + Match the benchmark dataset to the domain extent and create a classified grid convert to: + 0 (no data footprint of domain) + 1 (data footprint of domain) + 2 (data footprint of benchmark) + Then reproject classified benchmark grid to match reference grid resolution and crs. + Output is an array of values and a profile. + + Parameters + ---------- + benchmark : rasterio dataset + Rasterio dataset of the benchmark dataset for a given threshold + benchmark_profile : rasterio profile + A potentially modified profile to the benchmark dataset. + domain: rasterio dataset + Rasterio dataset of the domain grid (the maximum available grid for a given site) + domain_profile: rasterio profile + A potentially modified profile of the domain dataset. + reference_raster : pathlib Path + Path to reference dataset. + + Returns + ------- + boolean_benchmark : numpy Array + Array of values for the benchmark_boolean grid. + profile : rasterio profile + Updated, final profile of the boolean_benchmark grid. + + ''' + + #Make benchmark have same dimensions as domain (Assume domain has same CRS as benchmark) + #Get source CRS (benchmark and domain assumed to be same CRS) + source_crs = benchmark_profile['crs'].to_wkt() + #Get domain data + domain_arr = domain.read(1) + #Get benchmark data + benchmark_arr = benchmark.read(1) + #Create empty array with same dimensions as domain + benchmark_fit_to_domain = np.empty(domain_arr.shape) + #Make benchmark have same footprint as domain (Assume domain has same CRS as benchmark) + reproject(benchmark_arr, + destination = benchmark_fit_to_domain, + src_transform = benchmark.transform, + src_crs = source_crs, + src_nodata = benchmark.nodata, + dst_transform = domain.transform, + dst_crs = source_crs, + dst_nodata = benchmark.nodata, + dst_resolution = source_crs, + resampling = Resampling.bilinear) + #Convert fitted benchmark dataset to boolean. 0 = NODATA Regions and 1 = Data Regions + benchmark_fit_to_domain_bool = np.where(benchmark_fit_to_domain == benchmark.nodata,0,1) + #Merge domain datamask and benchmark data mask. New_nodata_value (2) = Domain NO DATA footprint, 0 = NO DATA for benchmark (within data region of domain), 1 = DATA region of benchmark. + new_nodata_value = 2 + classified_benchmark = np.where(domain_arr == domain.nodata, new_nodata_value, benchmark_fit_to_domain_bool) + + ##Reproject classified benchmark to reference raster crs and resolution. + #Read in reference raster + reference = rasterio.open(reference_raster) + #Determine the new transform and dimensions of reprojected/resampled classified benchmark dataset whos width, height, and bounds are same as domain dataset. + new_benchmark_transform, new_benchmark_width, new_benchmark_height = calculate_default_transform(source_crs, reference.crs, domain.width, domain.height, *domain.bounds, resolution = reference.res) + #Define an empty array that is same dimensions as output by the "calculate_default_transform" command. + classified_benchmark_projected = np.empty((new_benchmark_height,new_benchmark_width), dtype=np.uint8) + #Reproject and resample the classified benchmark dataset. Nearest Neighbor resampling due to integer values of classified benchmark. + reproject(classified_benchmark, + destination = classified_benchmark_projected, + src_transform = domain.transform, + src_crs = source_crs, + src_nodata = new_nodata_value, + dst_transform = new_benchmark_transform, + dst_crs = reference.crs, + dst_nodata = new_nodata_value, + dst_resolution = reference.res, + resampling = Resampling.nearest) + + #Update profile using reference profile as base (data type, NODATA, transform, width/height). + profile = reference.profile + profile.update(transform = new_benchmark_transform) + profile.update(dtype = rasterio.uint8) + profile.update(nodata = new_nodata_value) + profile.update (width = new_benchmark_width) + profile.update(height = new_benchmark_height) + return classified_benchmark_projected, profile \ No newline at end of file diff --git a/tools/tools_shared_variables.py b/tools/tools_shared_variables.py new file mode 100755 index 000000000..0c31a6fb8 --- /dev/null +++ b/tools/tools_shared_variables.py @@ -0,0 +1,62 @@ +import os + +# Environmental variables and constants. +TEST_CASES_DIR = r'/data/test_cases/' +PREVIOUS_FIM_DIR = r'/data/previous_fim' +OUTPUTS_DIR = os.environ['outputDataDir'] +INPUTS_DIR = r'/data/inputs' +AHPS_BENCHMARK_CATEGORIES = ['usgs', 'nws'] +FR_BENCHMARK_CATEGORIES = ['ble', 'ifc'] +BLE_MAGNITUDE_LIST = ['100yr', '500yr'] +IFC_MAGNITUDE_LIST = ['2yr', '5yr', '10yr', '25yr', '50yr', '100yr', '200yr', '500yr'] +AHPS_MAGNITUDE_LIST = ['action', 'minor', 'moderate', 'major'] + +MAGNITUDE_DICT = {'ble': BLE_MAGNITUDE_LIST, 'ifc': IFC_MAGNITUDE_LIST, 'usgs': AHPS_MAGNITUDE_LIST, 'nws': AHPS_MAGNITUDE_LIST} +PRINTWORTHY_STATS = ['CSI', 'TPR', 'TNR', 'FAR', 'MCC', 'TP_area_km2', 'FP_area_km2', 'TN_area_km2', 'FN_area_km2', 'contingency_tot_area_km2', 'TP_perc', 'FP_perc', 'TN_perc', 'FN_perc'] +GO_UP_STATS = ['CSI', 'TPR', 'MCC', 'TN_area_km2', 'TP_area_km2', 'TN_perc', 'TP_perc', 'TNR'] +GO_DOWN_STATS = ['FAR', 'FN_area_km2', 'FP_area_km2', 'FP_perc', 'FN_perc'] + +# Variables for eval_plots.py +BAD_SITES = [ + 'baki3', #USGS: ratio of evaluated vs domain is very low + 'cpei3', #USGS: mainstems does not extend sufficiently upstream (~35%), significant masking upstream + 'eagi1', #NWS: ratio of evaluated vs domain is very low + 'efdn7', #NWS: mainstems does not extend sufficiently upstream (~30%) + 'erwn6', #NWS: ratio of evaluated vs domain is very low + 'grfi2', #USGS: incorrect location + 'hohn4', #Both: no mainstems in vicinity + 'kcdm7', #USGS: incorrect location + 'kilo1', #Both: mainstems does not extend sufficiently upstream (~20%) + 'ksdm7', #USGS: masked + 'levk1', #NWS: Incorrect feature_id assigned from WRDS, this has been corrected + 'lkcm7', #NWS: masked + 'loun7', #NWS: benchmark is not consistent between thresholds + 'lrlm7', #NWS: masked + 'mcri2', #USGS: incorrect location + 'monv1', #NWS: mainstems does not extend sufficiently upstream (~30%) + 'mtao1', #USGS: ratio of evaluated vs domain is very low + 'nhso1', #USGS: mainstems does not extend sufficiently upstream (45%) + 'nmso1', #Both: mainstems does not extend sufficiently upstream + 'pori3', #USGS: mainstems does not extend sufficiently upstream + 'ptvn6', #Both: mainstems does not extend sufficiently upstream (50%) + 'roun6', #USGS: ratio of evaluated vs domain is very low + 'rwdn4', #Both: no mainstems in vicinity + 'selt2', #NWS: mainstems does not extend sufficiently upstream (~30%) + 'sweg1', #Both: mainstems does not extend sufficiently upstream (~30%) + 'vcni3', #USGS: ratio of evaluated vs domain is very low + 'watw3', #NWS: mainstems does not extend sufficiently upstream (~30%) + 'weat2', #NWS: mainstems does not extend sufficiently upstream (~50%) + 'wkew3' #NWS: mainstems does not extend sufficiently upstream (~45%) + ] +DISCARD_AHPS_QUERY = "not flow.isnull() & masked_perc<97 & not nws_lid in @BAD_SITES" + +elev_raster_ndv = -9999.0 + +# Colors. +ENDC = '\033[m' +TGREEN_BOLD = '\033[32;1m' +TGREEN = '\033[32m' +TRED_BOLD = '\033[31;1m' +TWHITE = '\033[37m' +WHITE_BOLD = '\033[37;1m' +CYAN_BOLD = '\033[36;1m'