Skip to content

Commit

Permalink
Cahaba Codebase Cleanup - Small Items
Browse files Browse the repository at this point in the history
General repository cleanup, made memory-profiling an optional flag, API's release feature now saves outputs.

- Remove Dockerfile.prod, rename Dockerfile.dev to just Dockerfile, and remove ``.dockerignore`.
- Clean up Dockerfile and remove any unused* packages or variables.
- Remove any unused* Python packages from the Pipfile.
- Move the CHANGELOG.md, SECURITY.md, and TERMS.md files to the /docs folder.
- Remove any unused* scripts in the /tools and /src folders.
- Move tools/preprocess scripts into tools/.
- Ensure all scripts in the /src folder have their code in functions and are being called via a __main__ function (This will help with implementing memory profiling fully).
- Changed memory-profiling to be an option flag -m for fim_run.sh.
- Updated FIM API to save all outputs during a "release" job.

This resolves #432, resolves #426, and resolves #434.
  • Loading branch information
nickchadwick-noaa authored Aug 18, 2021
1 parent 7aa4ab9 commit 8949dc3
Show file tree
Hide file tree
Showing 59 changed files with 598 additions and 1,371 deletions.
1 change: 0 additions & 1 deletion .dockerignore

This file was deleted.

7 changes: 3 additions & 4 deletions Dockerfile.dev → Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,16 @@ ARG projectDir=/foss_fim
ARG depDir=/dependencies
ARG taudemVersion=98137bb6541a0d0077a9c95becfed4e56d0aa0ac
ARG taudemVersion2=81f7a07cdd3721617a30ee4e087804fddbcffa88
ENV DEBIAN_FRONTEND noninteractive
ENV taudemDir=$depDir/taudem/bin
ENV taudemDir2=$depDir/taudem_accelerated_flowDirections/taudem/build/bin

RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*

RUN git clone https://github.com/dtarb/taudem.git
RUN git clone https://github.com/fernandoa123/cybergis-toolkit.git taudem_accelerated_flowDirections

RUN apt-get update --fix-missing && apt-get install -y cmake mpich \
libgtest-dev libboost-test-dev libnetcdf-dev && rm -rf /var/lib/apt/lists/*
libgtest-dev libboost-test-dev libnetcdf-dev && rm -rf /var/lib/apt/lists/*

## Compile Main taudem repo ##
RUN mkdir -p taudem/bin
Expand Down Expand Up @@ -53,7 +52,7 @@ ARG dataDir=/data
ARG projectDir=/foss_fim
ARG depDir=/dependencies
ENV inputDataDir=$dataDir/inputs
ENV outputDataDir=$dataDir/outputs
ENV outputDataDir=$dataDir/outputs
ENV srcDir=$projectDir/src
ENV taudemDir=$depDir/taudem/bin
ENV taudemDir2=$depDir/taudem_accelerated_flowDirections/taudem/build/bin
Expand Down
100 changes: 0 additions & 100 deletions Dockerfile.prod

This file was deleted.

182 changes: 93 additions & 89 deletions Pipfile.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ Please see the issue tracker on GitHub for known issues and for getting help.
NOAA's National Water Center welcomes anyone to contribute to the Cahaba repository to improve flood inundation mapping capabilities. Please contact Brad Bates ([email protected]) or Fernando Salas ([email protected]) to get started.

## Open Source Licensing Info
1. [TERMS](TERMS.md)
1. [TERMS](docs/TERMS.md)
2. [LICENSE](LICENSE)

## Credits and References
Expand Down
32 changes: 18 additions & 14 deletions api/node/updater/updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def update_loop():
outputs_path = f"/data/outputs/{current_jobs[job_name]['nice_name']}"
if os.path.isdir(outputs_path):
shutil.rmtree(outputs_path)

jobs_to_delete.append(job_name)

active_statuses = [
Expand Down Expand Up @@ -113,7 +113,7 @@ def update_loop():
total_active_cores += current_jobs[j]['parallel_jobs'] * 5
else:
total_active_cores += current_jobs[j]['parallel_jobs']

# Machine has enough resources to run a new job
potential_active_cores = 0
if current_jobs[job_name]['hucs_type'] == '6':
Expand Down Expand Up @@ -170,16 +170,21 @@ def update_loop():
current_jobs[job_name]['total_output_files_length'] = len(current_jobs[job_name]['output_files_saved'].keys())
current_jobs[job_name]['status'] = 'Ready to Save File'
elif current_jobs[job_name]['job_type'] == 'release':
# Move outputs to previous_fim and set them to be copied to the dev machine
if os.path.isdir(f"/data/previous_fim/{current_jobs[job_name]['nice_name']}"):
shutil.rmtree(f"/data/previous_fim/{current_jobs[job_name]['nice_name']}")
if os.path.isdir(f"/data/outputs/{current_jobs[job_name]['nice_name']}"): shutil.move(f"/data/outputs/{current_jobs[job_name]['nice_name']}", '/data/previous_fim')
for path, folders, files in os.walk(f"/data/previous_fim/{current_jobs[job_name]['nice_name']}"):
for file in files:
current_jobs[job_name]['output_files_saved'][os.path.join(path, file)] = 0
current_jobs[job_name]['total_output_files_length'] = len(current_jobs[job_name]['output_files_saved'].keys())
current_jobs[job_name]['status'] = 'Ready for Synthesize Test Cases'

if current_jobs[job_name]['status'] == 'Ready for Synthesize Test Cases':
job_name = current_jobs[job_name]['job_name']
nice_name = current_jobs[job_name]['nice_name']
parallel_jobs = current_jobs[job_name]['parallel_jobs']

if os.path.isdir(f"/data/previous_fim/{nice_name}"):
shutil.rmtree(f"/data/previous_fim/{nice_name}")
if os.path.isdir(f"/data/outputs/{nice_name}"): shutil.move(f"/data/outputs/{nice_name}", '/data/previous_fim')
# Kick off the new job as a docker container to run eval metrics
print(f"docker run -d --name {job_name} -v {DATA_PATH}:/data/ -v {DATA_PATH}temp/{job_name}/:/foss_fim -w /foss_fim/tools {DOCKER_IMAGE_PATH} /foss_fim/tools/synthesize_test_cases.py -c PREV --fim-version {nice_name} --job-number {parallel_jobs} -m /data/test_cases/metrics_library/all_official_versions.csv")
subprocess.call(f"docker run -d --name {job_name} -v {DATA_PATH}:/data/ -v {DATA_PATH}temp/{job_name}/:/foss_fim -w /foss_fim/tools {DOCKER_IMAGE_PATH} /foss_fim/tools/synthesize_test_cases.py -c PREV --fim-version {nice_name} --job-number {parallel_jobs} -m /data/test_cases/metrics_library/all_official_versions.csv", shell=True)
Expand All @@ -205,7 +210,6 @@ def update_loop():
subprocess.call(f"docker container rm {job_name}", shell=True)

current_jobs[job_name]['output_files_saved']['/data/test_cases/metrics_library/all_official_versions.csv'] = 0
current_jobs[job_name]['output_files_saved'][f"/data/previous_fim/{current_jobs[job_name]['nice_name']}/logs/docker.log"] = 0
current_jobs[job_name]['output_files_saved'][f"/data/previous_fim/{current_jobs[job_name]['nice_name']}/logs/synthesize_test_cases_docker.log"] = 0
current_jobs[job_name]['total_output_files_length'] = len(current_jobs[job_name]['output_files_saved'].keys())
current_jobs[job_name]['status'] = 'Ready for Eval Plots'
Expand Down Expand Up @@ -289,7 +293,7 @@ def update_loop():
current_jobs[job_name]['output_files_saved'][f"/data/previous_fim/{current_jobs[job_name]['nice_name']}/logs/generate_categorical_fim.log"] = 0
current_jobs[job_name]['total_output_files_length'] = len(current_jobs[job_name]['output_files_saved'].keys())
current_jobs[job_name]['status'] = 'Ready to Save File'

# Trigger connector to transmit the outputs to the output_handler
# If the output_handler is offline, it will keep retrying until the output_handler is online
if current_jobs[job_name]['status'] == 'Ready to Save File' and (shared_data['current_saving_job'] == '' or shared_data['current_saving_job'] == current_jobs[job_name]):
Expand All @@ -303,15 +307,15 @@ def update_loop():
output_to_save = {'path': path, 'chunk_index': current_jobs[job_name]['output_files_saved'][path]}

if output_to_save != {}:
if shared_data['connected']:
if shared_data['connected']:
sio.emit('ready_for_output_handler', {
'nice_name': current_jobs[job_name]['nice_name'],
'job_name': job_name,
'path': output_to_save['path'],
'chunk_index': output_to_save['chunk_index']
})
current_jobs[job_name]['status'] = 'Saving File'

# Once the output_handler is done getting the outputs and the connector deletes the temp repo source,
# mark as completed
if current_jobs[job_name]['status'] == 'Saving File':
Expand All @@ -335,8 +339,8 @@ def update_loop():
if os.path.isdir(destination):
shutil.rmtree(destination)
if os.path.isdir(f"{outputs_path}/aggregate_fim_outputs"): shutil.move(f"{outputs_path}/aggregate_fim_outputs", destination)
if os.path.isdir(f"{outputs_path}/logs"): shutil.move(f"{outputs_path}/logs", f"{destination}/logs")
if os.path.isdir(f"/data/catfim/{current_jobs[job_name]['nice_name']}"): shutil.move(f"/data/catfim/{current_jobs[job_name]['nice_name']}", f"{destination}/catfim")
if os.path.isdir(f"{outputs_path}/logs"): shutil.move(f"{outputs_path}/logs", f"{destination}/logs")
if os.path.isdir(f"/data/catfim/{current_jobs[job_name]['nice_name']}"): shutil.move(f"/data/catfim/{current_jobs[job_name]['nice_name']}", f"{destination}/catfim")

if os.path.isdir(outputs_path):
shutil.rmtree(outputs_path)
Expand All @@ -349,7 +353,7 @@ def update_loop():
pass

current_jobs[job_name]['status'] = 'Completed' if current_jobs[job_name]['exit_code'] == 0 else 'Error'

shared_data['current_saving_job'] = ''
current_jobs[job_name]['is_actively_saving'] = False
print(f"{job_name} completed")
Expand Down Expand Up @@ -383,7 +387,7 @@ def update_loop():
if shared_data['connected']: sio.emit('update', {'jobUpdates': job_updates, 'presetsList': presets_list})
with open('/data/outputs/current_jobs.json.temp', 'w') as f:
json.dump(current_jobs, f)
shutil.move('/data/outputs/current_jobs.json.temp', '/data/outputs/current_jobs.json')
shutil.move('/data/outputs/current_jobs.json.temp', '/data/outputs/current_jobs.json')

sio = socketio.Client()

Expand Down Expand Up @@ -523,4 +527,4 @@ def ws_file_saved(data):
current_jobs[job_name]['status'] = 'Ready to Save File'

sio.connect('http://fim_node_connector:6000/')
update_loop()
update_loop()
2 changes: 1 addition & 1 deletion config/params_template.env
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ export wbd_buffer=5000
export thalweg_lateral_elev_threshold=3

#### geospatial parameters ####
export max_split_distance_meters=1500
export max_split_distance_meters=2000
export ms_buffer_dist=7000
export lakes_buffer_dist_meters=20

Expand Down
17 changes: 17 additions & 0 deletions CHANGELOG.md → docs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,23 @@ All notable changes to this project will be documented in this file.
We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.
<br/><br/>

## v3.0.21.0 - 2021-08-18 - [PR #433](https://github.com/NOAA-OWP/cahaba/pull/433)

General repository cleanup, made memory-profiling an optional flag, API's release feature now saves outputs.

## Changes
- Remove `Dockerfile.prod`, rename `Dockerfile.dev` to just `Dockerfile`, and remove ``.dockerignore`.
- Clean up `Dockerfile` and remove any unused* packages or variables.
- Remove any unused* Python packages from the `Pipfile`.
- Move the `CHANGELOG.md`, `SECURITY.md`, and `TERMS.md` files to the `/docs` folder.
- Remove any unused* scripts in the `/tools` and `/src` folders.
- Move `tools/preprocess` scripts into `tools/`.
- Ensure all scripts in the `/src` folder have their code in functions and are being called via a `__main__` function (This will help with implementing memory profiling fully).
- Changed memory-profiling to be an option flag `-m` for `fim_run.sh`.
- Updated FIM API to save all outputs during a "release" job.

<br/><br/>

## v3.0.20.2 - 2021-08-13 - [PR #443](https://github.com/NOAA-OWP/cahaba/pull/443)

This merge modifies `clip_vectors_to_wbd.py` to check for relevant input data.
Expand Down
File renamed without changes.
File renamed without changes.
5 changes: 5 additions & 0 deletions fim_run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ usage ()
echo ' -w/--whitelist : list of files to save in a production run in addition to final inundation outputs'
echo ' ex: file1.tif,file2.json,file3.csv'
echo ' -v/--viz : compute post-processing on outputs to be used in viz'
echo ' -m/--mem : enable memory profiling'
exit
}

Expand Down Expand Up @@ -69,6 +70,9 @@ in
-v|--viz)
viz=1
;;
-m|--mem)
mem=1
;;
*) ;;
esac
shift
Expand Down Expand Up @@ -107,6 +111,7 @@ export extent=$extent
export production=$production
export whitelist=$whitelist
export viz=$viz
export mem=$mem
logFile=$outputRunDataDir/logs/summary.log

## Define inputs
Expand Down
14 changes: 8 additions & 6 deletions src/acquire_and_preprocess_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,26 +19,27 @@
NHD_VECTOR_EXTRACTION_SUFFIX,
PREP_PROJECTION,
WBD_NATIONAL_URL,
FIM_ID,
OVERWRITE_WBD,
OVERWRITE_NHD,
OVERWRITE_ALL)
FIM_ID
)

from utils.shared_functions import (pull_file, run_system_command,
subset_wbd_gpkg, delete_file,
from utils.shared_functions import (pull_file,
run_system_command,
delete_file,
getDriver)

NHDPLUS_VECTORS_DIRNAME = 'nhdplus_vectors'
NHDPLUS_RASTERS_DIRNAME = 'nhdplus_rasters'
NWM_HYDROFABRIC_DIRNAME = 'nwm_hydrofabric'
NWM_FILE_TO_SUBSET_WITH = 'nwm_flows.gpkg'


def subset_wbd_to_nwm_domain(wbd,nwm_file_to_use):

intersecting_indices = [not (gpd.read_file(nwm_file_to_use,mask=b).empty) for b in wbd.geometry]

return(wbd[intersecting_indices])


def pull_and_prepare_wbd(path_to_saved_data_parent_dir,nwm_dir_name,nwm_file_to_use,overwrite_wbd,num_workers):
"""
This helper function pulls and unzips Watershed Boundary Dataset (WBD) data. It uses the WBD URL defined by WBD_NATIONAL_URL.
Expand Down Expand Up @@ -125,6 +126,7 @@ def pull_and_prepare_wbd(path_to_saved_data_parent_dir,nwm_dir_name,nwm_file_to_

return(wbd_directory)


def pull_and_prepare_nwm_hydrofabric(path_to_saved_data_parent_dir, path_to_preinputs_dir,num_workers):
"""
This helper function pulls and unzips NWM hydrofabric data. It uses the NWM hydrofabric URL defined by NWM_HYDROFABRIC_URL.
Expand Down
6 changes: 4 additions & 2 deletions src/add_crosswalk.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@
# sys.path.append('/foss_fim/src')
# sys.path.append('/foss_fim/config')
from bathy_rc_adjust import bathy_rc_lookup
from utils.shared_functions import getDriver
from utils.shared_functions import getDriver, mem_profile
from utils.shared_variables import FIM_ID

@profile

@mem_profile
def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_fileName,input_bathy_fileName,output_bathy_fileName,output_bathy_streamorder_fileName,output_bathy_thalweg_fileName,output_bathy_xs_lookup_fileName,output_catchments_fileName,output_flows_fileName,output_src_fileName,output_src_json_fileName,output_crosswalk_fileName,output_hydro_table_fileName,input_huc_fileName,input_nwmflows_fileName,input_nwmcatras_fileName,mannings_n,input_nwmcat_fileName,extent,small_segments_filename,calibration_mode=False):

input_catchments = gpd.read_file(input_catchments_fileName)
Expand Down Expand Up @@ -274,6 +275,7 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f
with open(output_src_json_fileName,'w') as f:
json.dump(output_src_json,f,sort_keys=True,indent=2)


if __name__ == '__main__':

parser = argparse.ArgumentParser(description='Crosswalk for MS/FR networks; calculate synthetic rating curves; update short rating curves')
Expand Down
3 changes: 3 additions & 0 deletions src/adjust_headwater_streams.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ def adjust_headwaters(huc,nhd_streams,nwm_headwaters,nws_lids,headwater_id):

return nhd_streams, combined_pts


if __name__ == '__main__':

parser = argparse.ArgumentParser(description='adjust headwater stream geometery based on headwater start points')
Expand All @@ -173,6 +174,8 @@ def adjust_headwaters(huc,nhd_streams,nwm_headwaters,nws_lids,headwater_id):

args = vars(parser.parse_args())

#TODO variables below are not defined

adj_streams_gdf, adj_headwaters_gdf = adjust_headwaters(huc,nhd_streams,nwm_headwaters,nws_lids,headwater_id)

if subset_nhd_streams_fileName is not None:
Expand Down
Loading

0 comments on commit 8949dc3

Please sign in to comment.