Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

61 add a simple parameter to handle starting memory settings #64

Merged
21 changes: 11 additions & 10 deletions conf/base.config
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
process {

// The defaults for all processes (without labels)
cpus = { params.max_cpus }
memory = { params.max_memory }
time = { params.max_time }
cpus = 2
memory = 4.GB
time = { params.max_time }

errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' }
maxRetries = 1
Expand All @@ -18,7 +18,7 @@ process {
withLabel:process_low {
cpus = { check_max( 2 * task.attempt, 'cpus' ) }
memory = { check_max( 4.GB * task.attempt, 'memory' ) }
time = { check_max( 1.h * task.attempt, 'time' ) }
time = { check_max( 2.h * task.attempt, 'time' ) }

errorStrategy = { task.exitStatus in [21,143,137,104,134,139,247] ? 'retry' : 'finish' }
maxRetries = 1
Expand All @@ -35,9 +35,8 @@ process {
// Assemblies will first try to adjust themselves to a parallel execution
// If it is not possible, then it waits to use all the resources allowed
withLabel:process_assembly {
cpus = { if (task.attempt == 1) { check_max( 6 * task.attempt, 'cpus' ) } else { params.max_cpus } }
memory = { if (task.attempt == 1) { check_max( 20.GB * task.attempt, 'memory' ) } else { params.max_memory } }
time = { if (task.attempt == 1) { check_max( 24.h * task.attempt, 'time' ) } else { params.max_time } }
cpus = { if (task.attempt == 1) { check_max( params.start_asm_cpus, 'cpus' ) } else { params.max_cpus } }
memory = { if (task.attempt == 1) { check_max( params.start_asm_mem , 'memory' ) } else { params.max_memory } }

// retry at least once to try it with full resources
errorStrategy = { task.exitStatus in [1,21,143,137,104,134,139,247] ? 'retry' : 'finish' }
Expand All @@ -46,10 +45,12 @@ process {
}

// Quast sometimes can take too long
def quast_mem = ((params.start_asm_mem / 2) > 6.GB) ? (params.start_asm_mem / 2) : 6.GB
def quast_cpus = ((params.start_asm_cpus / 2) > 4 ) ? (params.start_asm_mem / 2) : 4
withName:quast {
cpus = { if (task.attempt == 1) { check_max( 4 * task.attempt, 'cpus' ) } else { params.max_cpus } }
memory = { if (task.attempt == 1) { check_max( 10.GB * task.attempt, 'memory' ) } else { params.max_memory } }
time = { if (task.attempt == 1) { check_max( 12.h * task.attempt, 'time' ) } else { params.max_time } }
cpus = { if (task.attempt == 1) { check_max( params.quast_cpus, 'cpus' ) } else { params.max_cpus } }
memory = { if (task.attempt == 1) { check_max( params.quast_mem , 'memory' ) } else { params.max_memory } }
time = { if (task.attempt == 1) { check_max( 12.h * task.attempt, 'time' ) } else { params.max_time } }

// retry at least once to try it with full resources
errorStrategy = { task.exitStatus in [21,143,137,104,134,139,247] ? 'retry' : 'finish' }
Expand Down
30 changes: 26 additions & 4 deletions conf/defaults.config
Original file line number Diff line number Diff line change
Expand Up @@ -134,10 +134,32 @@ params {
skip_shasta = false // Nanopore longreads only assemblies
shasta_additional_parameters = null // Must be given as shown in shasta manual. E.g. " --Reads.minReadLength 5000 "

// Max resource options
// Defaults only, expecting to be overwritten
max_memory = '20.GB'
max_cpus = 6

/*
* Resources controlling parameters
*
* Here some parameters that allow the user to better tune the resources used by the pipeline.
*
* The start_asm_{mem,cpus} parameter tells the pipeline how much memory should the assembly
* modules and quast request in the first try. This is essential for bigger genomes in order
* to avoid having to fail the first try due lack of memory and then running again (automatically)
* using all the max values allowed with the max_{mem,cpus} parameters.
*
* The max_memory and max_cpus parameters, tell the pipeline how much is the maximum number of
* these items that is allowoed per job. The pipeline start by requesting less mem&cpus than
* what is defined by these params, and, in case the first try fails, it then maxes out the job
* to use the maximum number you allowed.
*
* The max_time parameter defines how long a single job is allowed to run.
*/

// starting values for the assembly jobs (and quast) to ask for in the very first try
start_asm_mem = 20.GB
start_asm_cpus = 6

// maximum values to be used on automatic second try in case of lack of memory (all jobs)
max_memory = 40.GB
max_cpus = 10
max_time = '40.h'

}
40 changes: 32 additions & 8 deletions docs/assets/defaults.config
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,16 @@ params {

// Select the appropriate shasta config to use for assembly
// Since shasta v0.8 (Oct/2021) this parameter is now mandatory.
// You can check availability at: https://paoloshasta.github.io/shasta/Configurations.html
shasta_config = "Nanopore-Oct2021"

// Tells the pipeline to interpret the long reads as "corrected" long reads.
// This will activate (if available) the options for corrected reads in the
// assemblers: -corrected (in canu), --pacbio-corr|--nano-corr (in flye), etc.
// Be cautious when using this parameter. If your reads are not corrected, and
// This will activate (if available) the options for corrected or even high
// quality (hq) reads in the assemblers.
// Be cautious when using this parameter. If your reads are not corrected|hq, and
// you use this parameter, you will probably do not generate any contig.
corrected_long_reads = false
corrected_longreads = false
high_quality_longreads = false

// This parameter below (hybrid_strategy) is to select the hybrid strategies adopted by the pipeline.
// Read the documentation https://mpgap.readthedocs.io/en/latest/manual.html to know more about the hybrid strategies.
Expand Down Expand Up @@ -132,10 +134,32 @@ params {
skip_shasta = false // Nanopore longreads only assemblies
shasta_additional_parameters = null // Must be given as shown in shasta manual. E.g. " --Reads.minReadLength 5000 "

// Max resource options
// Defaults only, expecting to be overwritten
max_memory = '14.GB'
max_cpus = 6

/*
* Resources controlling parameters
*
* Here some parameters that allow the user to better tune the resources used by the pipeline.
*
* The start_asm_{mem,cpus} parameter tells the pipeline how much memory should the assembly
* modules and quast request in the first try. This is essential for bigger genomes in order
* to avoid having to fail the first try due lack of memory and then running again (automatically)
* using all the max values allowed with the max_{mem,cpus} parameters.
*
* The max_memory and max_cpus parameters, tell the pipeline how much is the maximum number of
* these items that is allowoed per job. The pipeline start by requesting less mem&cpus than
* what is defined by these params, and, in case the first try fails, it then maxes out the job
* to use the maximum number you allowed.
*
* The max_time parameter defines how long a single job is allowed to run.
*/

// starting values for the assembly jobs (and quast) to ask for in the very first try
start_asm_mem = 20.GB
start_asm_cpus = 6

// maximum values to be used on automatic second try in case of lack of memory (all jobs)
max_memory = 40.GB
max_cpus = 10
max_time = '40.h'

}
10 changes: 6 additions & 4 deletions docs/manual.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,15 @@ Please note that, through the command line, the parameters that are boolean (tru
| `--input` | :material-check: | NA | Path to input [samplesheet](samplesheet.md#) in YAML format |
| `--output` | :material-check: | NA | Directory to store output files |

## Max job request
## Start/Max resources on job request

| <div style="width:120px">Parameter</div> | Required | Default | Description |
| :--------------------------------------- | :------- | :------ | :---------- |
| `--max_cpus` | :material-close: | 4 | Max number of threads a job can use across attempts |
| `--max_memory` | :material-close: | 6.GB | Max amount of memory a job can use across attempts |
| `--max_time` | :material-close: | 40.h | Max amount of time a job can take to run |
| `--start_asm_cpus` | :material-close: | 6 | How many cpus should an assembly job request in the very first attempt?. This is essential for bigger genomes in order to avoid having to fail the first try due lack of memory and then running again (automatically) using all the max values allowed with the max_cpus parameter. |
| `--start_asm_mem` | :material-close: | 20.GB | How much memory should an assembly job request in the very first attempt?. This is essential for bigger genomes in order to avoid having to fail the first try due lack of memory and then running again (automatically) using all the max values allowed with the max_mem parameter. |
| `--max_cpus` | :material-close: | 10 | Max number of threads a job can use across attempts. After one failed attempt this is maxed out. |
| `--max_memory` | :material-close: | 40.GB | Max amount of memory a job can use across attempts. After one failed attempt this is maxed out. |
| `--max_time` | :material-close: | 40.h | Max amount of time a job can take to run |

## Assemblies configuration

Expand Down
1 change: 1 addition & 0 deletions markdown/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ The tracking for changes started in v2.
* Increase default `--max_memory` value to 20.GB.
* Add a directory called `final_assemblies` in the main output directory holding all the assemblies generated in the pipeline execution.
* Updated documentation as discussed in [[#58](https://github.com/fmalmeida/MpGAP/issues/58)] and [[#57](https://github.com/fmalmeida/MpGAP/issues/57)].
* [[#61](https://github.com/fmalmeida/MpGAP/issues/61)] - Add a simple parameter to adjust how many cpus and how much memory should the assembly jobs request in the first attempt to avoid lack of resources errors.

## v3.1.4 -- [2022-Sep-03]

Expand Down
2 changes: 1 addition & 1 deletion modules/QualityAssessment/multiqc.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 2 additions & 3 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,10 @@
Maintained by Felipe Marques de Almeida
Contact: [email protected]
*/
// Load base.config (contains some label resources configuration)
includeConfig 'conf/base.config'

// loading required / default pipeline parameters
includeConfig 'conf/defaults.config'
// Load base.config (contains some label resources configuration)
includeConfig 'conf/base.config'
// fix type of variable expected
params.hybrid_strategy = params.hybrid_strategy.toString()

Expand Down
14 changes: 12 additions & 2 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,24 @@
"description": "Set the top limit of resources for pipeline",
"help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.",
"properties": {
"max_cpus": {
"start_asm_cpus": {
"type": "integer",
"default": 6,
"description": "Starting (1st try) amount of cpus that assembly jobs should use. Essential for avoiding 1st-try errors due lack of resources for big genomes."
},
"start_asm_mem": {
"type": "string",
"default": "20.GB",
"description": "Starting (1st try) amount of memory that assembly jobs should use. Essential for avoiding 1st-try errors due lack of resources for big genomes."
},
"max_cpus": {
"type": "integer",
"default": 10,
"description": "Max amount of threads to use"
},
"max_memory": {
"type": "string",
"default": "20.GB",
"default": "40.GB",
"description": "Max amount of memory to use"
},
"max_time": {
Expand Down
Loading