metagenomics · pbelmann · Jan 24, 2023 · Jan 22, 2023 · Jan 22, 2023 · Jan 22, 2023
diff --git a/default/fullPipeline_illumina_nanpore.yml b/default/fullPipeline_illumina_nanpore.yml
@@ -27,11 +27,17 @@ steps:
        # --length_required  reads shorter than length_required will be discarded, default is 15. (int [=15])
        # PE data, the front/tail trimming settings are given with -f, --trim_front1 and -t, --trim_tail1
        additionalParams: " --detect_adapter_for_pe -q 20 --cut_front --trim_front1 3 --cut_tail --trim_tail1 3 --cut_mean_quality 10 --length_required 50 "
+       timeLimit: "AUTO"
     nonpareil:
       additionalParams: " -v 10 -r 1234 "
     jellyfish:
       additionalParams:
-        count: " -m 21 -s 100M "
+        # --counter-len is the counter length in bits.
+        # -s is the size of the hash
+        # -m k-mer length
+        # -m, --conter-len and -s  determine the RAM peak usage which can be tested by using jellyfish mem.
+        # --disk writes intermediate results to disk
+        count: " -m 21 --counter-len 9 -s 30G --disk  "
         histo: " "
 
   qcONT:

diff --git a/docs/modules/assembly.md b/docs/modules/assembly.md
@@ -2,7 +2,7 @@
 
 ## Input
 
-=== "Command for short read data"
+=== "Command for short read data with optional single end reads"
 
     ```
     -entry wShortReadAssembly -params-file example_params/assembly.yml

diff --git a/example_params/assembly.yml b/example_params/assembly.yml
@@ -8,7 +8,9 @@ scratch: "/vol/scratch"
 publishDirMode: "symlink"
 steps:
   assembly:
-    input: test_data/assembly/samples.tsv 
+    input:
+      paired: test_data/assembly/samples.tsv
+      single: test_data/assembly/samplesUnpaired.tsv
     megahit:
       additionalParams: " --min-contig-len 200 "
       fastg: true

diff --git a/example_params/assemblyMetaspades.yml b/example_params/assemblyMetaspades.yml
@@ -8,7 +8,8 @@ scratch: "/vol/scratch"
 publishDirMode: "symlink"
 steps:
   assembly:
-    input: test_data/assembly/samples.tsv 
+    input: 
+      paired: test_data/assembly/samples.tsv 
     metaspades:
       additionalParams: "  "
       fastg: true

diff --git a/modules/annotation/module.nf b/modules/annotation/module.nf
@@ -236,7 +236,7 @@ process pResistanceGeneIdentifier {
    S5CMD_PARAMS=params.steps?.annotation?.rgi?.database?.download?.s5cmd?.params ?: ""
    '''
    mkdir -p !{params.polished.databases}
-   ADDITIONAL_RGI_PARAMS=!{params.steps?.annotation?.rgi?.additionalParams}
+   ADDITIONAL_RGI_PARAMS="!{params.steps?.annotation?.rgi?.additionalParams}"
 
    # Check developer documentation
    CARD_JSON=""

diff --git a/modules/assembly/shortReadAssembler.nf b/modules/assembly/shortReadAssembler.nf
@@ -11,6 +11,7 @@ def getOutput(SAMPLE, RUNID, TOOL, filename){
           '/' + TOOL + '/' + filename
 }
 
+def timestamp = new java.util.Date().format( 'YYYYMMdd-HHmmss-SSS')
 
 /*
 * This process uses kmer frequencies and the nonpareil diversity index to predict peak memory consumption on an assembler.
@@ -170,17 +171,36 @@ workflow wShortReadAssemblyList {
 
 
 /*
- * Takes a tab separated file of files containing reads as input and produces assembly results.
- * Input file with columns seperated by tabs:
+ * Takes two tab separated file of files containing paired and optional single reads 
+ * as input and produces assembly results.
+ * Input files must have two columns seperated by tabs:
  * SAMPLE and READS
  *
  * Output is of the format [SAMPLE, CONTIGS]
  * 
  */
 workflow wShortReadAssemblyFile {
     main:
-       Channel.from(file(params.steps.assembly.input)) | splitCsv(sep: '\t', header: true) \
-             | map { it -> [ it.SAMPLE, it.READS, file("NOT_SET")]} | set { reads  }
+       SAMPLE_IDX = 0       
+       SAMPLE_PAIRED_IDX = 1
+       UNPAIRED_IDX = 2
+
+       readsPaired = Channel.empty()
+       if(params.steps.assembly.input.containsKey("paired")) {
+       	 Channel.from(file(params.steps.assembly.input.paired)) | splitCsv(sep: '\t', header: true) \
+             | map { it -> [ it.SAMPLE, it.READS]} | set { readsPaired  }
+       }
+
+       readsSingle = Channel.empty()
+       if(params.steps.assembly.input.containsKey("single")) {
+         Channel.from(file(params.steps.assembly.input.single)) | splitCsv(sep: '\t', header: true) \
+             | map { it -> [ it.SAMPLE, it.READS]} | set { readsSingle  }
+       }
+
+       readsPaired | join(readsSingle, by: SAMPLE_IDX, remainder: true) \
+	| map { sample -> sample[UNPAIRED_IDX] == null ? \
+		[sample[SAMPLE_IDX], sample[SAMPLE_PAIRED_IDX], file("NOT_SET")] : sample } \
+	| set { reads }
 
        _wAssembly(reads, Channel.empty(), Channel.empty())
     emit:
@@ -287,9 +307,10 @@ workflow _wCalculateMegahitResources {
           | join(kmerFrequencies) | pPredictFlavor
 
          PREDICTED_RAM_IDX = 1
+
          pPredictFlavor.out.memory \
           | collectFile(newLine: true, seed: "SAMPLE\tPREDICTED_RAM", storeDir: params.logDir){ item ->
-        	[ "predictedMegahitRAM.tsv", item[SAMPLE_IDX] + '\t' + item[PREDICTED_RAM_IDX]  ]
+        	[ "predictedMegahitRAM." + timestamp + ".tsv", item[SAMPLE_IDX] + '\t' + item[PREDICTED_RAM_IDX]  ]
     	  }
 
          resourceType.doNotPredict | map{ it -> it + "NoPrediction" } \

diff --git a/test_data/assembly/samplesUnpaired.tsv b/test_data/assembly/samplesUnpaired.tsv
@@ -0,0 +1,2 @@
+SAMPLE	READS
+test1	https://openstack.cebitec.uni-bielefeld.de:8080/swift/v1/meta_test/small/unpaired.fq.gz
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		SAMPLE READS
		test1 https://openstack.cebitec.uni-bielefeld.de:8080/swift/v1/meta_test/small/unpaired.fq.gz