diff --git a/.markdownlint.yaml b/.markdownlint.yaml index c673096..1c1d276 100644 --- a/.markdownlint.yaml +++ b/.markdownlint.yaml @@ -1,3 +1,6 @@ default: true -MD024: # no-duplicate-heading/no-duplicate-header - allow_different_nesting: true +no-duplicate-heading: + # Check for duplicate headings only amongst sibling headings. Allow duplicate + # headings in separate sections, which is common in CHANGELOGs (e.g., Fixed, + # Added, Changed). + siblings_only: true diff --git a/CHANGELOG.md b/CHANGELOG.md index 09c1b47..b9ba24f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ The format is based on [Keep a Changelog], and this project adheres to ## Unreleased +### 0.7.0 (2024-04-23) + ### Added - [#57](https://github.com/MAAP-Project/gedi-subsetter/issues/57) Users may @@ -16,10 +18,10 @@ The format is based on [Keep a Changelog], and this project adheres to download failures are now retried up to 10 times to reduce the likelihood that subsetting will fail due to a download failure. - [#56](https://github.com/MAAP-Project/gedi-subsetter/issues/56) The - `bin/subset` script now captures output to `stderr` and writes it to the log - file named `gedi-subset.log`. When a job succeeds, the log file will appear - in the job's output directory. Otherwise, it will appear in the jobs triage - directory. + `bin/subset.sh` script now captures output to `stderr` and writes it to the + log file named `gedi-subset.log`. When a job succeeds, the log file will + appear in the job's output directory. Otherwise, it will appear in the job's + triage directory. - [#65](https://github.com/MAAP-Project/gedi-subsetter/issues/65) All supported GEDI collections are now cloud-hosted, and granules are now downloaded from the cloud rather than from DAAC servers. diff --git a/algorithm_config.yaml b/algorithm_config.yaml index d167ec2..7869886 100644 --- a/algorithm_config.yaml +++ b/algorithm_config.yaml @@ -1,12 +1,12 @@ algorithm_description: Subset GEDI L1B, L2A, L2B, or L4A granules within an area of interest (AOI) algorithm_name: gedi-subset -algorithm_version: 0.6.2 +algorithm_version: 0.7.0 repository_url: https://github.com/MAAP-Project/gedi-subsetter.git -docker_container_url: mas.maap-project.org/root/maap-workspaces/base_images/vanilla:v3.1.4 +docker_container_url: mas.maap-project.org/root/maap-workspaces/base_images/vanilla:v3.1.5 disk_space: 20GB queue: maap-dps-worker-32gb build_command: gedi-subsetter/bin/build-dps -run_command: gedi-subsetter/bin/subset +run_command: gedi-subsetter/bin/subset.sh inputs: config: [] file: diff --git a/bin/install b/bin/install index 381dc61..d6ac353 100755 --- a/bin/install +++ b/bin/install @@ -31,7 +31,7 @@ done # PIP_REQUIRE_VENV=0 to avoid complaints about installing packages outside of a # virtual environment. PIP_REQUIRE_VENV=0 "${run}" conda lock install "${conda_lock_args[@]}" \ - --name "${conda_env_name}" + --name "${conda_env_name}" "${basedir}/conda-lock.yml" # pip install gedi-subsetter in editable mode. PIP_REQUIRE_VENV=0 "${run}" python -m pip install -e "${basedir}" --no-deps diff --git a/bin/run b/bin/run index eb2db05..5116899 100755 --- a/bin/run +++ b/bin/run @@ -27,6 +27,13 @@ if ! "${conda}" env list | grep -q gedi_subset; then exit 1 fi +# We must set the MAAP_CONF environment variable to point to the preinstalled +# maap-py package. This is necessary because MAAP_CONF is no longer set +# automatically in the Docker container (as it was in the past), and the changes +# I had previously made to maap-py to read maap.cfg as a bundled resource file +# were reverted. +export MAAP_CONF=/maap-py/ + [[ -n "${quiet}" ]] || set -x if [[ "${CI:-}" == "true" ]]; then diff --git a/bin/subset b/bin/subset.sh similarity index 90% rename from bin/subset rename to bin/subset.sh index 8b161c1..eb4d5d8 100755 --- a/bin/subset +++ b/bin/subset.sh @@ -2,9 +2,9 @@ set -euo pipefail -# Apply dirname twice to get to the top of the repo, since this script is in the -# `bin` directory (i.e., first dirname gets to `bin`, second gets to the top). -base_dir=$(dirname "$(dirname "$(readlink -f "$0")")") +bin=$(dirname "$(readlink -f "$0")") +base_dir=$(dirname "${bin}") +run="${bin}/run" input_dir="${PWD}/input" output_dir="${PWD}/output" @@ -18,6 +18,10 @@ if ! test -d "${input_dir}"; then # within it. command=("${subset_py}" --verbose "$@") else + echo "--- >>> ${input_dir} ---" >&2 + ls -l "${input_dir}" >&2 + echo "--- <<< ${input_dir} ---" >&2 + # There is an `input` sub-directory of the current working directory, so # assume the AOI file is the sole file within the `input` sub-directory. aoi="$(ls "${input_dir}"/*)" @@ -72,7 +76,8 @@ mkdir -p "${output_dir}" # to write directly to a file because it is a tricky feat to coordinate logging # from multiple processes into a single file. logfile="${PWD}/gedi-subset.log" -"${CONDA_EXE:-conda}" run --no-capture-output --name gedi_subset "${command[@]}" 2>"${logfile}" + +"${run}" "${command[@]}" 2>"${logfile}" # If we get here, the command above succeeded (otherwise this script would have # exited with a non-zero status). We can now move the log file to the output