From a7b03baf9c31137a75084adae48209c5625481c2 Mon Sep 17 00:00:00 2001
From: Peter Belmann <pbelmann@cebitec.uni-bielefeld.de>
Date: Sun, 22 Jan 2023 22:08:34 +0000
Subject: [PATCH 1/5] fix(qc): restrict  RAM usage

---
 default/fullPipeline_illumina_nanpore.yml | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/default/fullPipeline_illumina_nanpore.yml b/default/fullPipeline_illumina_nanpore.yml
index c3bd7d02..051f47dc 100644
--- a/default/fullPipeline_illumina_nanpore.yml
+++ b/default/fullPipeline_illumina_nanpore.yml
@@ -27,11 +27,17 @@ steps:
        # --length_required  reads shorter than length_required will be discarded, default is 15. (int [=15])
        # PE data, the front/tail trimming settings are given with -f, --trim_front1 and -t, --trim_tail1
        additionalParams: " --detect_adapter_for_pe -q 20 --cut_front --trim_front1 3 --cut_tail --trim_tail1 3 --cut_mean_quality 10 --length_required 50 "
+       timeLimit: "AUTO"
     nonpareil:
       additionalParams: " -v 10 -r 1234 "
     jellyfish:
       additionalParams:
-        count: " -m 21 -s 100M "
+        # --counter-len is the counter length in bits.
+        # -s is the size of the hash
+        # -m k-mer length
+        # -m, --conter-len and -s  determine the RAM peak usage which can be tested by using jellyfish mem.
+        # --disk writes intermediate results to disk
+        count: " -m 21 --counter-len 9 -s 30G --disk  "
         histo: " "
 
   qcONT:

From 79a19abbfbf203f17d2e6afde922ccd1325d5893 Mon Sep 17 00:00:00 2001
From: Peter Belmann <pbelmann@cebitec.uni-bielefeld.de>
Date: Sun, 22 Jan 2023 22:10:51 +0000
Subject: [PATCH 2/5] fix(assembly): allow unpaired reads when running
 independent assembly module

---
 docs/modules/assembly.md               |  2 +-
 example_params/assembly.yml            |  4 +++-
 modules/assembly/shortReadAssembler.nf | 27 ++++++++++++++++++++++----
 test_data/assembly/samplesUnpaired.tsv |  2 ++
 4 files changed, 29 insertions(+), 6 deletions(-)
 create mode 100644 test_data/assembly/samplesUnpaired.tsv

diff --git a/docs/modules/assembly.md b/docs/modules/assembly.md
index de0addb5..269a4188 100644
--- a/docs/modules/assembly.md
+++ b/docs/modules/assembly.md
@@ -2,7 +2,7 @@
 
 ## Input
 
-=== "Command for short read data"
+=== "Command for short read data with optional single end reads"
 
     ```
     -entry wShortReadAssembly -params-file example_params/assembly.yml
diff --git a/example_params/assembly.yml b/example_params/assembly.yml
index a54260da..87fe46f9 100644
--- a/example_params/assembly.yml
+++ b/example_params/assembly.yml
@@ -8,7 +8,9 @@ scratch: "/vol/scratch"
 publishDirMode: "symlink"
 steps:
   assembly:
-    input: test_data/assembly/samples.tsv 
+    input:
+      paired: test_data/assembly/samples.tsv
+      single: test_data/assembly/samplesUnpaired.tsv
     megahit:
       additionalParams: " --min-contig-len 200 "
       fastg: true
diff --git a/modules/assembly/shortReadAssembler.nf b/modules/assembly/shortReadAssembler.nf
index 1b6cd6bd..7ff28bfe 100644
--- a/modules/assembly/shortReadAssembler.nf
+++ b/modules/assembly/shortReadAssembler.nf
@@ -170,8 +170,9 @@ workflow wShortReadAssemblyList {
 
 
 /*
- * Takes a tab separated file of files containing reads as input and produces assembly results.
- * Input file with columns seperated by tabs:
+ * Takes two tab separated file of files containing paired and optional single reads 
+ * as input and produces assembly results.
+ * Input files must have two columns seperated by tabs:
  * SAMPLE and READS
  *
  * Output is of the format [SAMPLE, CONTIGS]
@@ -179,8 +180,26 @@ workflow wShortReadAssemblyList {
  */
 workflow wShortReadAssemblyFile {
     main:
-       Channel.from(file(params.steps.assembly.input)) | splitCsv(sep: '\t', header: true) \
-             | map { it -> [ it.SAMPLE, it.READS, file("NOT_SET")]} | set { reads  }
+       SAMPLE_IDX = 0       
+       SAMPLE_PAIRED_IDX = 1
+       UNPAIRED_IDX = 2
+
+       readsPaired = Channel.empty()
+       if(params.steps.assembly.input.containsKey("paired")) {
+       	 Channel.from(file(params.steps.assembly.input.paired)) | splitCsv(sep: '\t', header: true) \
+             | map { it -> [ it.SAMPLE, it.READS]} | set { readsPaired  }
+       }
+
+       readsSingle = Channel.empty()
+       if(params.steps.assembly.input.containsKey("single")) {
+         Channel.from(file(params.steps.assembly.input.single)) | splitCsv(sep: '\t', header: true) \
+             | map { it -> [ it.SAMPLE, it.READS]} | set { readsSingle  }
+       }
+
+       readsPaired | join(readsSingle, by: SAMPLE_IDX, remainder: true) \
+	| map { sample -> sample[UNPAIRED_IDX] == null ? \
+		[sample[SAMPLE_IDX], sample[SAMPLE_PAIRED_IDX], file("NOT_SET")] : sample } \
+	| view | set { reads }
 
        _wAssembly(reads, Channel.empty(), Channel.empty())
     emit:
diff --git a/test_data/assembly/samplesUnpaired.tsv b/test_data/assembly/samplesUnpaired.tsv
new file mode 100644
index 00000000..b7f63a61
--- /dev/null
+++ b/test_data/assembly/samplesUnpaired.tsv
@@ -0,0 +1,2 @@
+SAMPLE	READS
+test1	https://openstack.cebitec.uni-bielefeld.de:8080/swift/v1/meta_test/small/unpaired.fq.gz

From d79c7245af2d51e43e0fdf4ef1a9c17b9595f0e4 Mon Sep 17 00:00:00 2001
From: Peter Belmann <pbelmann@cebitec.uni-bielefeld.de>
Date: Sun, 22 Jan 2023 22:29:11 +0000
Subject: [PATCH 3/5] feat(assembly):add timestamp suffix to predicted RAM
 output tsv

---
 modules/assembly/shortReadAssembler.nf | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/modules/assembly/shortReadAssembler.nf b/modules/assembly/shortReadAssembler.nf
index 7ff28bfe..1f08c23a 100644
--- a/modules/assembly/shortReadAssembler.nf
+++ b/modules/assembly/shortReadAssembler.nf
@@ -11,6 +11,7 @@ def getOutput(SAMPLE, RUNID, TOOL, filename){
           '/' + TOOL + '/' + filename
 }
 
+def timestamp = new java.util.Date().format( 'YYYYMMdd-HHmmss-SSS')
 
 /*
 * This process uses kmer frequencies and the nonpareil diversity index to predict peak memory consumption on an assembler.
@@ -199,7 +200,7 @@ workflow wShortReadAssemblyFile {
        readsPaired | join(readsSingle, by: SAMPLE_IDX, remainder: true) \
 	| map { sample -> sample[UNPAIRED_IDX] == null ? \
 		[sample[SAMPLE_IDX], sample[SAMPLE_PAIRED_IDX], file("NOT_SET")] : sample } \
-	| view | set { reads }
+	| set { reads }
 
        _wAssembly(reads, Channel.empty(), Channel.empty())
     emit:
@@ -306,9 +307,10 @@ workflow _wCalculateMegahitResources {
           | join(kmerFrequencies) | pPredictFlavor
 
          PREDICTED_RAM_IDX = 1
+
          pPredictFlavor.out.memory \
           | collectFile(newLine: true, seed: "SAMPLE\tPREDICTED_RAM", storeDir: params.logDir){ item ->
-        	[ "predictedMegahitRAM.tsv", item[SAMPLE_IDX] + '\t' + item[PREDICTED_RAM_IDX]  ]
+        	[ "predictedMegahitRAM." + timestamp + ".tsv", item[SAMPLE_IDX] + '\t' + item[PREDICTED_RAM_IDX]  ]
     	  }
 
          resourceType.doNotPredict | map{ it -> it + "NoPrediction" } \

From b573e2ea8042fa38c78e5b19fa7bcb2ec513ee19 Mon Sep 17 00:00:00 2001
From: Peter Belmann <pbelmann@cebitec.uni-bielefeld.de>
Date: Mon, 23 Jan 2023 08:04:35 +0000
Subject: [PATCH 4/5] fix(annotation): add quotes for additional params
 variable

---
 modules/annotation/module.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/annotation/module.nf b/modules/annotation/module.nf
index 9ee99493..1e694c6e 100644
--- a/modules/annotation/module.nf
+++ b/modules/annotation/module.nf
@@ -236,7 +236,7 @@ process pResistanceGeneIdentifier {
    S5CMD_PARAMS=params.steps?.annotation?.rgi?.database?.download?.s5cmd?.params ?: ""
    '''
    mkdir -p !{params.polished.databases}
-   ADDITIONAL_RGI_PARAMS=!{params.steps?.annotation?.rgi?.additionalParams}
+   ADDITIONAL_RGI_PARAMS="!{params.steps?.annotation?.rgi?.additionalParams}"
 
    # Check developer documentation
    CARD_JSON=""

From 72a39988f338ea7f95f10e5ee556b9937c90c3cd Mon Sep 17 00:00:00 2001
From: Peter Belmann <pbelmann@cebitec.uni-bielefeld.de>
Date: Mon, 23 Jan 2023 22:55:55 +0000
Subject: [PATCH 5/5] fix(assembly): adjust metaspades yaml

---
 example_params/assemblyMetaspades.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/example_params/assemblyMetaspades.yml b/example_params/assemblyMetaspades.yml
index 22f8796d..56ea647c 100644
--- a/example_params/assemblyMetaspades.yml
+++ b/example_params/assemblyMetaspades.yml
@@ -8,7 +8,8 @@ scratch: "/vol/scratch"
 publishDirMode: "symlink"
 steps:
   assembly:
-    input: test_data/assembly/samples.tsv 
+    input: 
+      paired: test_data/assembly/samples.tsv 
     metaspades:
       additionalParams: "  "
       fastg: true