From a7b03baf9c31137a75084adae48209c5625481c2 Mon Sep 17 00:00:00 2001 From: Peter Belmann Date: Sun, 22 Jan 2023 22:08:34 +0000 Subject: [PATCH 1/5] fix(qc): restrict RAM usage --- default/fullPipeline_illumina_nanpore.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/default/fullPipeline_illumina_nanpore.yml b/default/fullPipeline_illumina_nanpore.yml index c3bd7d02..051f47dc 100644 --- a/default/fullPipeline_illumina_nanpore.yml +++ b/default/fullPipeline_illumina_nanpore.yml @@ -27,11 +27,17 @@ steps: # --length_required reads shorter than length_required will be discarded, default is 15. (int [=15]) # PE data, the front/tail trimming settings are given with -f, --trim_front1 and -t, --trim_tail1 additionalParams: " --detect_adapter_for_pe -q 20 --cut_front --trim_front1 3 --cut_tail --trim_tail1 3 --cut_mean_quality 10 --length_required 50 " + timeLimit: "AUTO" nonpareil: additionalParams: " -v 10 -r 1234 " jellyfish: additionalParams: - count: " -m 21 -s 100M " + # --counter-len is the counter length in bits. + # -s is the size of the hash + # -m k-mer length + # -m, --conter-len and -s determine the RAM peak usage which can be tested by using jellyfish mem. + # --disk writes intermediate results to disk + count: " -m 21 --counter-len 9 -s 30G --disk " histo: " " qcONT: From 79a19abbfbf203f17d2e6afde922ccd1325d5893 Mon Sep 17 00:00:00 2001 From: Peter Belmann Date: Sun, 22 Jan 2023 22:10:51 +0000 Subject: [PATCH 2/5] fix(assembly): allow unpaired reads when running independent assembly module --- docs/modules/assembly.md | 2 +- example_params/assembly.yml | 4 +++- modules/assembly/shortReadAssembler.nf | 27 ++++++++++++++++++++++---- test_data/assembly/samplesUnpaired.tsv | 2 ++ 4 files changed, 29 insertions(+), 6 deletions(-) create mode 100644 test_data/assembly/samplesUnpaired.tsv diff --git a/docs/modules/assembly.md b/docs/modules/assembly.md index de0addb5..269a4188 100644 --- a/docs/modules/assembly.md +++ b/docs/modules/assembly.md @@ -2,7 +2,7 @@ ## Input -=== "Command for short read data" +=== "Command for short read data with optional single end reads" ``` -entry wShortReadAssembly -params-file example_params/assembly.yml diff --git a/example_params/assembly.yml b/example_params/assembly.yml index a54260da..87fe46f9 100644 --- a/example_params/assembly.yml +++ b/example_params/assembly.yml @@ -8,7 +8,9 @@ scratch: "/vol/scratch" publishDirMode: "symlink" steps: assembly: - input: test_data/assembly/samples.tsv + input: + paired: test_data/assembly/samples.tsv + single: test_data/assembly/samplesUnpaired.tsv megahit: additionalParams: " --min-contig-len 200 " fastg: true diff --git a/modules/assembly/shortReadAssembler.nf b/modules/assembly/shortReadAssembler.nf index 1b6cd6bd..7ff28bfe 100644 --- a/modules/assembly/shortReadAssembler.nf +++ b/modules/assembly/shortReadAssembler.nf @@ -170,8 +170,9 @@ workflow wShortReadAssemblyList { /* - * Takes a tab separated file of files containing reads as input and produces assembly results. - * Input file with columns seperated by tabs: + * Takes two tab separated file of files containing paired and optional single reads + * as input and produces assembly results. + * Input files must have two columns seperated by tabs: * SAMPLE and READS * * Output is of the format [SAMPLE, CONTIGS] @@ -179,8 +180,26 @@ workflow wShortReadAssemblyList { */ workflow wShortReadAssemblyFile { main: - Channel.from(file(params.steps.assembly.input)) | splitCsv(sep: '\t', header: true) \ - | map { it -> [ it.SAMPLE, it.READS, file("NOT_SET")]} | set { reads } + SAMPLE_IDX = 0 + SAMPLE_PAIRED_IDX = 1 + UNPAIRED_IDX = 2 + + readsPaired = Channel.empty() + if(params.steps.assembly.input.containsKey("paired")) { + Channel.from(file(params.steps.assembly.input.paired)) | splitCsv(sep: '\t', header: true) \ + | map { it -> [ it.SAMPLE, it.READS]} | set { readsPaired } + } + + readsSingle = Channel.empty() + if(params.steps.assembly.input.containsKey("single")) { + Channel.from(file(params.steps.assembly.input.single)) | splitCsv(sep: '\t', header: true) \ + | map { it -> [ it.SAMPLE, it.READS]} | set { readsSingle } + } + + readsPaired | join(readsSingle, by: SAMPLE_IDX, remainder: true) \ + | map { sample -> sample[UNPAIRED_IDX] == null ? \ + [sample[SAMPLE_IDX], sample[SAMPLE_PAIRED_IDX], file("NOT_SET")] : sample } \ + | view | set { reads } _wAssembly(reads, Channel.empty(), Channel.empty()) emit: diff --git a/test_data/assembly/samplesUnpaired.tsv b/test_data/assembly/samplesUnpaired.tsv new file mode 100644 index 00000000..b7f63a61 --- /dev/null +++ b/test_data/assembly/samplesUnpaired.tsv @@ -0,0 +1,2 @@ +SAMPLE READS +test1 https://openstack.cebitec.uni-bielefeld.de:8080/swift/v1/meta_test/small/unpaired.fq.gz From d79c7245af2d51e43e0fdf4ef1a9c17b9595f0e4 Mon Sep 17 00:00:00 2001 From: Peter Belmann Date: Sun, 22 Jan 2023 22:29:11 +0000 Subject: [PATCH 3/5] feat(assembly):add timestamp suffix to predicted RAM output tsv --- modules/assembly/shortReadAssembler.nf | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/modules/assembly/shortReadAssembler.nf b/modules/assembly/shortReadAssembler.nf index 7ff28bfe..1f08c23a 100644 --- a/modules/assembly/shortReadAssembler.nf +++ b/modules/assembly/shortReadAssembler.nf @@ -11,6 +11,7 @@ def getOutput(SAMPLE, RUNID, TOOL, filename){ '/' + TOOL + '/' + filename } +def timestamp = new java.util.Date().format( 'YYYYMMdd-HHmmss-SSS') /* * This process uses kmer frequencies and the nonpareil diversity index to predict peak memory consumption on an assembler. @@ -199,7 +200,7 @@ workflow wShortReadAssemblyFile { readsPaired | join(readsSingle, by: SAMPLE_IDX, remainder: true) \ | map { sample -> sample[UNPAIRED_IDX] == null ? \ [sample[SAMPLE_IDX], sample[SAMPLE_PAIRED_IDX], file("NOT_SET")] : sample } \ - | view | set { reads } + | set { reads } _wAssembly(reads, Channel.empty(), Channel.empty()) emit: @@ -306,9 +307,10 @@ workflow _wCalculateMegahitResources { | join(kmerFrequencies) | pPredictFlavor PREDICTED_RAM_IDX = 1 + pPredictFlavor.out.memory \ | collectFile(newLine: true, seed: "SAMPLE\tPREDICTED_RAM", storeDir: params.logDir){ item -> - [ "predictedMegahitRAM.tsv", item[SAMPLE_IDX] + '\t' + item[PREDICTED_RAM_IDX] ] + [ "predictedMegahitRAM." + timestamp + ".tsv", item[SAMPLE_IDX] + '\t' + item[PREDICTED_RAM_IDX] ] } resourceType.doNotPredict | map{ it -> it + "NoPrediction" } \ From b573e2ea8042fa38c78e5b19fa7bcb2ec513ee19 Mon Sep 17 00:00:00 2001 From: Peter Belmann Date: Mon, 23 Jan 2023 08:04:35 +0000 Subject: [PATCH 4/5] fix(annotation): add quotes for additional params variable --- modules/annotation/module.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/annotation/module.nf b/modules/annotation/module.nf index 9ee99493..1e694c6e 100644 --- a/modules/annotation/module.nf +++ b/modules/annotation/module.nf @@ -236,7 +236,7 @@ process pResistanceGeneIdentifier { S5CMD_PARAMS=params.steps?.annotation?.rgi?.database?.download?.s5cmd?.params ?: "" ''' mkdir -p !{params.polished.databases} - ADDITIONAL_RGI_PARAMS=!{params.steps?.annotation?.rgi?.additionalParams} + ADDITIONAL_RGI_PARAMS="!{params.steps?.annotation?.rgi?.additionalParams}" # Check developer documentation CARD_JSON="" From 72a39988f338ea7f95f10e5ee556b9937c90c3cd Mon Sep 17 00:00:00 2001 From: Peter Belmann Date: Mon, 23 Jan 2023 22:55:55 +0000 Subject: [PATCH 5/5] fix(assembly): adjust metaspades yaml --- example_params/assemblyMetaspades.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/example_params/assemblyMetaspades.yml b/example_params/assemblyMetaspades.yml index 22f8796d..56ea647c 100644 --- a/example_params/assemblyMetaspades.yml +++ b/example_params/assemblyMetaspades.yml @@ -8,7 +8,8 @@ scratch: "/vol/scratch" publishDirMode: "symlink" steps: assembly: - input: test_data/assembly/samples.tsv + input: + paired: test_data/assembly/samples.tsv metaspades: additionalParams: " " fastg: true