fix: pipeline was not running starsolo in parallel

brickmanlab · Nov 3, 2024 · 4176307 · 4176307
1 parent c2c3848
commit 4176307
Show file tree

Hide file tree

Showing 6 changed files with 62 additions and 61 deletions.
diff --git a/README.md b/README.md
@@ -56,6 +56,31 @@ nextflow run brickmanlab/primeseq \
     --outdir output
 ```
 
+or by creating slurm batch script (`align.sbatch`)
+
+```bash
+#!/bin/bash
+
+#SBATCH --job-name=align
+#SBATCH --mail-type=END,FAIL
+#SBATCH --mail-user=NONE
+#SBATCH -c 1
+#SBATCH --mem=2gb
+#SBATCH --time=1-00:00:00
+#SBATCH --output=align.log
+#SBATCH -w dancmpn02fl
+
+module load openjdk/20.0.0 nextflow/23.04.1.5866 singularity/3.8.0
+
+nextflow run /home/fdb589/primeseq \
+    -with-tower \
+    -profile ku_sund_danhead,dancmpn02fl \
+    --genome GRCm39-2024-A \
+    --input samplesheet.csv \
+    --wells wells.csv \
+    --outdir results
+```
+
 > [!WARNING]
 > Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;
 > see [docs](https://nf-co.re/usage/configuration#custom-configuration-files).

diff --git a/main.nf b/main.nf
@@ -19,21 +19,12 @@ include { PRIMESEQ  } from './workflows/primeseq'
 include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_primeseq_pipeline'
 include { PIPELINE_COMPLETION     } from './subworkflows/local/utils_nfcore_primeseq_pipeline'
 
-include { getGenomeAttribute      } from './subworkflows/local/utils_nfcore_primeseq_pipeline'
-
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     GENOME PARAMETER VALUES
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-params.fasta = getGenomeAttribute('fasta')
-params.gtf = getGenomeAttribute('gtf')
-params.star_index = getGenomeAttribute('star')
-
-ch_star_index = Channel.fromPath(params.star_index, checkIfExists: true).map{ it -> [ [id:'star_index'], it ] }.collect()
-ch_wells = Channel.fromPath(params.wells, checkIfExists: true)
-
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     NAMED WORKFLOWS FOR PIPELINE
@@ -47,6 +38,8 @@ workflow BRICKMANLAB_PRIMESEQ {
 
     take:
     samplesheet // channel: samplesheet read in from --input
+    wells
+    star_index
 
     main:
 
@@ -55,8 +48,8 @@ workflow BRICKMANLAB_PRIMESEQ {
     //
     PRIMESEQ (
         samplesheet,
-        ch_star_index,
-        ch_wells
+        wells,
+        star_index
     )
 
     emit:
@@ -83,14 +76,17 @@ workflow {
         params.monochrome_logs,
         args,
         params.outdir,
-        params.input
+        params.input,
+        params.wells
     )
 
     //
     // WORKFLOW: Run main workflow
     //
     BRICKMANLAB_PRIMESEQ (
-        PIPELINE_INITIALISATION.out.samplesheet
+        PIPELINE_INITIALISATION.out.samplesheet,
+        PIPELINE_INITIALISATION.out.wells,
+        PIPELINE_INITIALISATION.out.star_index
     )
 
     //

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -72,33 +72,6 @@
                     "exists": true,
                     "description": "Base path to reference genomes",
                     "fa_icon": ""
-                },
-                "fasta": {
-                    "type": "string",
-                    "format": "file-path",
-                    "exists": true,
-                    "mimetype": "text/plain",
-                    "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$",
-                    "description": "Path to FASTA genome file.",
-                    "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.",
-                    "fa_icon": "far fa-file-code"
-                },
-                "gtf": {
-                    "type": "string",
-                    "format": "file-path",
-                    "exists": true,
-                    "mimetype": "text/plain",
-                    "pattern": "^\\S+\\.gtf(\\.gz)?$",
-                    "description": "Path to GTF annotation file.",
-                    "fa_icon": "fas fa-code-branch",
-                    "help_text": "This parameter is *mandatory* if `--genome` is not specified."
-                },
-                "star_index": {
-                    "type": "string",
-                    "format": "path",
-                    "exists": true,
-                    "fa_icon": "fas fa-bezier-curve",
-                    "description": "Path to directory or tar.gz archive for pre-built STAR index."
                 }
             }
         },

diff --git a/subworkflows/local/starsolo.nf b/subworkflows/local/starsolo.nf
@@ -4,17 +4,27 @@ workflow STARSOLO {
 
     take:
     reads // channel: [ val(meta), [ fastq ] ]
-    index
+    index // channel [ val(meta), [starindex] ]
+    whitelist // file [whitelist]
 
     main:
 
     ch_versions = Channel.empty()
 
-    STARSOLO_ALIGN (
-        reads,
-        Channel.fromPath("$projectDir/assets/whitelist.tsv", checkIfExists: true),
-        index
-    )
+    ch_reads = reads.map {
+        meta, fastq -> [
+            [
+                id: meta.id,
+                plate_id: meta.plate_id,
+                umi_len: 16,
+                umi_start: 13,
+                cb_len: 12,
+                cb_start: 1,
+            ], "CB_UMI_Simple", fastq
+        ]
+    }
+
+    STARSOLO_ALIGN ( ch_reads, whitelist, index )
 
     ch_versions = ch_versions.mix(STARSOLO_ALIGN.out.versions.first())
 

diff --git a/subworkflows/local/utils_nfcore_primeseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_primeseq_pipeline/main.nf
@@ -36,6 +36,7 @@ workflow PIPELINE_INITIALISATION {
     nextflow_cli_args //   array: List of positional nextflow CLI args
     outdir            //  string: The output directory where the results will be saved
     input             //  string: Path to input samplesheet
+    wells             // string: Path to wells sheet
 
     main:
 
@@ -100,9 +101,14 @@ workflow PIPELINE_INITIALISATION {
         }
         .set { ch_samplesheet }
 
+    ch_wells = Channel.fromPath(wells, checkIfExists: true)
+    star_index = Channel.fromPath(file(getGenomeAttribute('star'), checkIfExists: true)).map { it -> [[id:it.Name], it] }.collect()
+
     emit:
     samplesheet = ch_samplesheet
     versions    = ch_versions
+    wells       = ch_wells
+    star_index  = star_index
 }
 
 /*

diff --git a/workflows/primeseq.nf b/workflows/primeseq.nf
@@ -12,6 +12,7 @@ include { paramsSummaryMap; fromSamplesheet } from 'plugin/nf-validation'
 include { paramsSummaryMultiqc              } from '../subworkflows/nf-core/utils_nfcore_pipeline'
 include { softwareVersionsToYAML            } from '../subworkflows/nf-core/utils_nfcore_pipeline'
 include { methodsDescriptionText            } from '../subworkflows/local/utils_nfcore_primeseq_pipeline'
+include { getGenomeAttribute                } from '../subworkflows/local/utils_nfcore_primeseq_pipeline'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -23,14 +24,16 @@ workflow PRIMESEQ {
 
     take:
     ch_samplesheet // channel: samplesheet read in from --input
+    wells
     ch_star_index
-    ch_wells
 
     main:
 
     ch_versions = Channel.empty()
     ch_multiqc_files = Channel.empty()
 
+    whitelist = file("$projectDir/assets/whitelist.tsv", checkIfExists: true)
+
     //
     // MODULE: Run FastQC
     //
@@ -43,26 +46,14 @@ workflow PRIMESEQ {
     //
     // MODULE: Run STARSolo
     //
-    ch_reads = ch_samplesheet.map {
-        meta, fastq -> [
-            [
-                id: meta.id,
-                plate_id: meta.plate_id,
-                umi_len: 16,
-                umi_start: 13,
-                cb_len: 12,
-                cb_start: 1,
-            ], "CB_UMI_Simple", fastq
-        ]
-    }
-    STARSOLO ( ch_reads, ch_star_index )
+    STARSOLO ( ch_samplesheet, ch_star_index, whitelist )
     ch_versions = ch_versions.mix(STARSOLO.out.versions.first())
     ch_multiqc_files = ch_multiqc_files.mix(STARSOLO.out.for_multiqc)
 
     //
     // MODULE: Merge wells and create count matrix
     //
-    MAKE_COUNT_MATRIX ( STARSOLO.out.counts.combine(ch_wells) )
+    MAKE_COUNT_MATRIX ( STARSOLO.out.counts.combine(wells) )
     ch_versions = ch_versions.mix(MAKE_COUNT_MATRIX.out.versions.first())
 
     //