Merge pull request #535 from georgiesamaha/nci_gadi

Add NCI Gadi HPC config
nf-core · Aug 13, 2023 · 5d15daa · 5d15daa
2 parents 1d76683 + 5edad82
commit 5d15daa
Show file tree

Hide file tree

Showing 5 changed files with 119 additions and 0 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -90,6 +90,7 @@ jobs:
           - "mjolnir_globe"
           - "mpcdf"
           - "munin"
+          - "nci_gadi"
           - "nu_genomics"
           - "oist"
           - "pasteur"

diff --git a/README.md b/README.md
@@ -146,6 +146,7 @@ Currently documentation is available for the following systems:
 - [MJOLNIR_GLOBE](docs/mjolnir_globe.md)
 - [MPCDF](docs/mpcdf.md)
 - [MUNIN](docs/munin.md)
+- [NCI GADI](docs/nci_gadi.md)
 - [NU_GENOMICS](docs/nu_genomics.md)
 - [OIST](docs/oist.md)
 - [PASTEUR](docs/pasteur.md)

diff --git a/conf/nci_gadi.config b/conf/nci_gadi.config
@@ -0,0 +1,44 @@
+// NCI Gadi nf-core configuration profile
+params {
+    config_profile_description = 'NCI Gadi HPC profile provided by nf-core/configs'
+    config_profile_contact = 'Georgie Samaha (@georgiesamaha), Matthew Downton (@mattdton)'
+    config_profile_url = 'https://opus.nci.org.au/display/Help/Gadi+User+Guide'
+    project = System.getenv("PROJECT")
+}
+
+// Enable use of Singularity to run containers
+singularity {
+    enabled = true
+    autoMounts = true
+}
+
+// Submit up to 300 concurrent jobs (Gadi exec max)
+// pollInterval and queueStatInterval of every 5 minutes
+// submitRateLimit of 20 per minute
+executor {
+    queueSize = 300
+    pollInterval = '5 min'
+    queueStatInterval = '5 min'
+    submitRateLimit = '20 min'
+}
+
+// Define process resource limits
+process {
+    executor = 'pbspro'
+    project = System.getenv("PROJECT")
+    storage = 'scratch/params.project'
+    module = 'singularity'
+    cache = 'lenient'
+    stageInMode = 'symlink'
+    queue = { task.memory < 128.GB ? 'normalbw' : (task.memory >= 128.GB && task.memory <= 190.GB ? 'normal' : (task.memory > 190.GB && task.memory <= 1020.GB ? 'hugemembw' : '')) }
+    beforeScript = 'module load singularity'
+}
+
+// Write custom trace file with outputs required for SU calculation
+def trace_timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss')
+trace {
+    enabled = true
+    overwrite = false
+    file = "./gadi-nf-core-trace-${trace_timestamp}.txt"
+    fields = 'name,status,exit,duration,realtime,cpus,%cpu,memory,%mem,rss'
+}
diff --git a/docs/nci_gadi.md b/docs/nci_gadi.md
@@ -0,0 +1,72 @@
+# nf-core/configs: NCI Gadi HPC Configuration
+
+nf-core pipelines have been successfully configured for use on the [Gadi HPC](https://opus.nci.org.au/display/Help/Gadi+User+Guide) at the National Computational Infrastructure (NCI), Canberra, Australia.
+
+To run an nf-core pipeline at NCI Gadi, run the pipeline with `-profile singularity,nci_gadi`. This will download and launch the [`nci_gadi.config`](../conf/nci_gadi.config) which has been pre-configured with a setup suitable for the NCI Gadi HPC cluster. Using this profile, a docker image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline.
+
+## Access to NCI Gadi
+
+Please be aware that you will need to have a user account, be a member of an Gadi project, and have a service unit allocation to your project in order to use this infrastructure. See the [NCI user guide](https://opus.nci.org.au/display/Help/Getting+Started+at+NCI) for details on getting access to Gadi.
+
+## Launch an nf-core pipeline on Gadi
+
+### Prerequisites
+
+Before running the pipeline you will need to load Nextflow and Singularity, both of which are globally installed modules on Gadi. You can do this by running the commands below:
+
+```bash
+module purge
+module load nextflow singularity
+```
+
+### Execution command
+
+```bash
+module load nextflow
+module load singularity
+
+nextflow run <nf-core_pipeline>/main.nf \
+    -profile singularity,nci_gadi \
+    <additional flags>
+```
+
+### Cluster considerations
+
+Please be aware that as of July 2023, NCI Gadi HPC queues **do not** have external network access. This means you will not be able to pull the workflow code base or containers if you submit your `nextflow run` command as a job on any of the standard job queues. NCI currently recommends you run your Nextflow head job either in a GNU screen or tmux session from the login node or submit it as a job to the [copyq](https://opus.nci.org.au/display/Help/Queue+Structure). See the [nf-core documentation](https://nf-co.re/docs/usage/offline) for instructions on running pipelines offline.
+
+This config currently determines which Gadi queue to submit your task jobs to based on the amount of memory required. For the sake of resource and cost (service unit) efficiency, the following rules are applied by this config:
+
+- Tasks requesting **less than 128 Gb** will be submitted to the normalbw queue
+- Tasks requesting **more than 128 Gb and less than 190 Gb** will be submitted to the normal queue
+- Tasks requesting **more than 190 Gb and less than 1020 Gb** will be submitted to the hugemembw queue
+
+See the NCI Gadi [queue limit documentation](https://opus.nci.org.au/display/Help/Queue+Limits) for details on charge rates for each queue.
+
+### Project accounting
+
+This config uses the PBS environmental variable `$PROJECT` to assign a project code to all task job submissions for billing purposes. If you are a member of multiple Gadi projects, you should confirm which project will be charged for your pipeline execution. You can do this using:
+
+```bash
+echo $PROJECT
+```
+
+The version of Nextflow installed on Gadi has been modified to make it easier to specify resource options for jobs submitted to the cluster. See NCI's [Gadi user guide](https://opus.nci.org.au/display/DAE/Nextflow) for more details. You can manually override the $PROJECT specification by editing your local copy of the `nci_gadi.config` and replacing $PROJECT with your project code. For example:
+
+```nextflow
+process {
+    executor = 'pbspro'
+    project = 'aa00'
+    storage = 'scratch/aa00+gdata/aa00'
+    ...
+}
+```
+
+## Resource usage
+
+The NCI Gadi config summarises resource usage in a custom trace file that will be saved to your execution directory. However, for accounting or resource benchmarking purposes you may need to collect per-task service unit (SU) charges. Upon workflow completion, you can run the Sydney Informatics Hub's [gadi_nfcore_report.sh](https://github.com/Sydney-Informatics-Hub/HPC_usage_reports/blob/master/Scripts/gadi_nfcore_report.sh) script in your workflow execution directory with:
+
+```bash
+bash gadi_nfcore_report.sh
+```
+
+This script will collect resources from the PBS log files printed to each task's `.command.log`. Resource requests and usage for each process is summarised in the output `gadi-nf-core-joblogs.tsv` file. This is useful for resource benchmarking and SU accounting.
diff --git a/nfcore_custom.config b/nfcore_custom.config
@@ -72,6 +72,7 @@ profiles {
     mjolnir_globe    { includeConfig "${params.custom_config_base}/conf/mjolnir_globe.config" }
     mpcdf            { includeConfig "${params.custom_config_base}/conf/mpcdf.config" }
     munin            { includeConfig "${params.custom_config_base}/conf/munin.config" }
+    nci_gadi         { includeConfig "${params.custom_config_base}/conf/nci_gadi.config" }
     nu_genomics      { includeConfig "${params.custom_config_base}/conf/nu_genomics.config" }
     oist             { includeConfig "${params.custom_config_base}/conf/oist.config" }
     pasteur          { includeConfig "${params.custom_config_base}/conf/pasteur.config" }