Skip to content

Commit

Permalink
fix: pipeline was not running starsolo in parallel
Browse files Browse the repository at this point in the history
  • Loading branch information
matq007 committed Nov 3, 2024
1 parent c2c3848 commit 4176307
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 61 deletions.
25 changes: 25 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,31 @@ nextflow run brickmanlab/primeseq \
--outdir output
```

or by creating slurm batch script (`align.sbatch`)

```bash
#!/bin/bash

#SBATCH --job-name=align
#SBATCH --mail-type=END,FAIL
#SBATCH --mail-user=NONE
#SBATCH -c 1
#SBATCH --mem=2gb
#SBATCH --time=1-00:00:00
#SBATCH --output=align.log
#SBATCH -w dancmpn02fl

module load openjdk/20.0.0 nextflow/23.04.1.5866 singularity/3.8.0

nextflow run /home/fdb589/primeseq \
-with-tower \
-profile ku_sund_danhead,dancmpn02fl \
--genome GRCm39-2024-A \
--input samplesheet.csv \
--wells wells.csv \
--outdir results
```

> [!WARNING]
> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;
> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files).
Expand Down
22 changes: 9 additions & 13 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,12 @@ include { PRIMESEQ } from './workflows/primeseq'
include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_primeseq_pipeline'
include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_primeseq_pipeline'

include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_primeseq_pipeline'

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
GENOME PARAMETER VALUES
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

params.fasta = getGenomeAttribute('fasta')
params.gtf = getGenomeAttribute('gtf')
params.star_index = getGenomeAttribute('star')

ch_star_index = Channel.fromPath(params.star_index, checkIfExists: true).map{ it -> [ [id:'star_index'], it ] }.collect()
ch_wells = Channel.fromPath(params.wells, checkIfExists: true)

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
NAMED WORKFLOWS FOR PIPELINE
Expand All @@ -47,6 +38,8 @@ workflow BRICKMANLAB_PRIMESEQ {

take:
samplesheet // channel: samplesheet read in from --input
wells
star_index

main:

Expand All @@ -55,8 +48,8 @@ workflow BRICKMANLAB_PRIMESEQ {
//
PRIMESEQ (
samplesheet,
ch_star_index,
ch_wells
wells,
star_index
)

emit:
Expand All @@ -83,14 +76,17 @@ workflow {
params.monochrome_logs,
args,
params.outdir,
params.input
params.input,
params.wells
)

//
// WORKFLOW: Run main workflow
//
BRICKMANLAB_PRIMESEQ (
PIPELINE_INITIALISATION.out.samplesheet
PIPELINE_INITIALISATION.out.samplesheet,
PIPELINE_INITIALISATION.out.wells,
PIPELINE_INITIALISATION.out.star_index
)

//
Expand Down
27 changes: 0 additions & 27 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -72,33 +72,6 @@
"exists": true,
"description": "Base path to reference genomes",
"fa_icon": ""
},
"fasta": {
"type": "string",
"format": "file-path",
"exists": true,
"mimetype": "text/plain",
"pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$",
"description": "Path to FASTA genome file.",
"help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.",
"fa_icon": "far fa-file-code"
},
"gtf": {
"type": "string",
"format": "file-path",
"exists": true,
"mimetype": "text/plain",
"pattern": "^\\S+\\.gtf(\\.gz)?$",
"description": "Path to GTF annotation file.",
"fa_icon": "fas fa-code-branch",
"help_text": "This parameter is *mandatory* if `--genome` is not specified."
},
"star_index": {
"type": "string",
"format": "path",
"exists": true,
"fa_icon": "fas fa-bezier-curve",
"description": "Path to directory or tar.gz archive for pre-built STAR index."
}
}
},
Expand Down
22 changes: 16 additions & 6 deletions subworkflows/local/starsolo.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,27 @@ workflow STARSOLO {

take:
reads // channel: [ val(meta), [ fastq ] ]
index
index // channel [ val(meta), [starindex] ]
whitelist // file [whitelist]

main:

ch_versions = Channel.empty()

STARSOLO_ALIGN (
reads,
Channel.fromPath("$projectDir/assets/whitelist.tsv", checkIfExists: true),
index
)
ch_reads = reads.map {
meta, fastq -> [
[
id: meta.id,
plate_id: meta.plate_id,
umi_len: 16,
umi_start: 13,
cb_len: 12,
cb_start: 1,
], "CB_UMI_Simple", fastq
]
}

STARSOLO_ALIGN ( ch_reads, whitelist, index )

ch_versions = ch_versions.mix(STARSOLO_ALIGN.out.versions.first())

Expand Down
6 changes: 6 additions & 0 deletions subworkflows/local/utils_nfcore_primeseq_pipeline/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ workflow PIPELINE_INITIALISATION {
nextflow_cli_args // array: List of positional nextflow CLI args
outdir // string: The output directory where the results will be saved
input // string: Path to input samplesheet
wells // string: Path to wells sheet

main:

Expand Down Expand Up @@ -100,9 +101,14 @@ workflow PIPELINE_INITIALISATION {
}
.set { ch_samplesheet }

ch_wells = Channel.fromPath(wells, checkIfExists: true)
star_index = Channel.fromPath(file(getGenomeAttribute('star'), checkIfExists: true)).map { it -> [[id:it.Name], it] }.collect()

emit:
samplesheet = ch_samplesheet
versions = ch_versions
wells = ch_wells
star_index = star_index
}

/*
Expand Down
21 changes: 6 additions & 15 deletions workflows/primeseq.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ include { paramsSummaryMap; fromSamplesheet } from 'plugin/nf-validation'
include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline'
include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline'
include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_primeseq_pipeline'
include { getGenomeAttribute } from '../subworkflows/local/utils_nfcore_primeseq_pipeline'

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand All @@ -23,14 +24,16 @@ workflow PRIMESEQ {

take:
ch_samplesheet // channel: samplesheet read in from --input
wells
ch_star_index
ch_wells

main:

ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()

whitelist = file("$projectDir/assets/whitelist.tsv", checkIfExists: true)

//
// MODULE: Run FastQC
//
Expand All @@ -43,26 +46,14 @@ workflow PRIMESEQ {
//
// MODULE: Run STARSolo
//
ch_reads = ch_samplesheet.map {
meta, fastq -> [
[
id: meta.id,
plate_id: meta.plate_id,
umi_len: 16,
umi_start: 13,
cb_len: 12,
cb_start: 1,
], "CB_UMI_Simple", fastq
]
}
STARSOLO ( ch_reads, ch_star_index )
STARSOLO ( ch_samplesheet, ch_star_index, whitelist )
ch_versions = ch_versions.mix(STARSOLO.out.versions.first())
ch_multiqc_files = ch_multiqc_files.mix(STARSOLO.out.for_multiqc)

//
// MODULE: Merge wells and create count matrix
//
MAKE_COUNT_MATRIX ( STARSOLO.out.counts.combine(ch_wells) )
MAKE_COUNT_MATRIX ( STARSOLO.out.counts.combine(wells) )
ch_versions = ch_versions.mix(MAKE_COUNT_MATRIX.out.versions.first())

//
Expand Down

0 comments on commit 4176307

Please sign in to comment.