nf-core · DongzeHE · Jan 20, 2025 · Jan 21, 2025 · Jan 22, 2025 · Jan 22, 2025
diff --git a/conf/modules.config b/conf/modules.config
@@ -41,6 +41,7 @@ process {
         }
         withName: 'ANNDATA_BARCODES' {
             ext.prefix = { "${meta.id}_${meta.input_type}_matrix" }
+            // ext.prefix = { "${meta.id}_filtered_matrix" }
             publishDir = [
                 path: { "${params.outdir}/${params.aligner}/mtx_conversions/${meta.id}" },
                 mode: params.publish_dir_mode,
@@ -139,15 +140,16 @@ if (params.aligner == "alevin") {
                 mode: params.publish_dir_mode,
                 enabled: params.save_reference
             ]
-            ext.args = { "--rlen ${params.simpleaf_rlen}" }
+            // because piscem create a large number of intermediate files,
+            // we set scratch to true to avoid IO issues
+            scratch = true
         }
         withName: 'SIMPLEAF_QUANT' {
             publishDir = [
                 path: { "${params.outdir}/${params.aligner}/${meta.id}" },
                 mode: params.publish_dir_mode,
                 saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
             ]
-            ext.args = "-r cr-like"
         }
         // Fix for issue 196
         // Modified for issue 334

diff --git a/core.1739377 b/core.1739377
diff --git a/docs/output.md b/docs/output.md
@@ -93,7 +93,7 @@ This pipeline uses the simplified and flexible modules in [Simpleaf](https://sim
 
 **Output directory: `results/reference_genome`**
 
-- `salmon_index`
+- `simpleaf_index`
   - Contains the indexed reference transcriptome for the Salmon mapper
 - `alevin/txp2gene.tsv`
   - The transcriptome to gene mapping TSV file utilized by Alevin-fry

diff --git a/main.nf b/main.nf
@@ -30,8 +30,8 @@ include { PIPELINE_COMPLETION     } from './subworkflows/local/utils_nfcore_scrn
 // Thus, manually provided files are not overwritten by the genome attributes
 params.fasta            = getGenomeAttribute('fasta')
 params.gtf              = getGenomeAttribute('gtf')
-params.salmon_index     = getGenomeAttribute('simpleaf')
-params.txp2gene         = getGenomeAttribute('simpleaf_tx2pgene')
+params.simpleaf_index     = getGenomeAttribute('simpleaf')
+params.txp2gene         = getGenomeAttribute('simpleaf_txp2gene')
 params.cellranger_index = params.aligner == 'cellrangerarc' ?
                             getGenomeAttribute('cellrangerarc') :
                             getGenomeAttribute('cellranger')

diff --git a/modules/local/alevinqc.nf b/modules/local/alevinqc.nf
@@ -1,20 +1,22 @@
 process ALEVINQC {
 
     //
-    // This module executes alevinfry QC reporting tool on alevin results
+    // This module executes alevinfry QC reporting tool on alevin-fry results
     //
 
     tag "$meta.id"
     label 'process_low'
 
-    //The alevinqc 1.14.0 container is broken, missing some libraries - thus reverting this to previous 1.12.1 version
-    conda "bioconda::bioconductor-alevinqc=1.12.1"
+    conda "bioconda::bioconductor-alevinqc=1.18.0"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/bioconductor-alevinqc:1.12.1--r41h9f5acd7_0' :
-        'biocontainers/bioconductor-alevinqc:1.12.1--r41h9f5acd7_0' }"
+        'https://depot.galaxyproject.org/singularity/bioconductor-alevinqc:1.18.0--r43hf17093f_0' :
+        'biocontainers/bioconductor-alevinqc:1.18.0--r43hf17093f_0' }"
 
+    // all metas are the same
     input:
-    tuple val(meta), path(alevin_results)
+    tuple val(meta), path(quant_dir, stageAs: "quant_dir")
+    tuple val(meta1), path(permit_dir, stageAs: "permit_dir")
+    tuple val(meta2), path(map_dir)
 
     output:
     tuple val(meta), path("alevin_report_${meta.id}.html"), emit: report
@@ -29,9 +31,9 @@ process ALEVINQC {
     #!/usr/bin/env Rscript
     require(alevinQC)
     alevinFryQCReport(
-        mapDir = "${alevin_results}/af_map",
-        quantDir = "${alevin_results}/af_quant",
-        permitDir= "${alevin_results}/af_quant",
+        mapDir = "${map_dir}",
+        permitDir= "${permit_dir}",
+        quantDir = "${quant_dir}",
         sampleId = "${prefix}",
         outputFile = "alevin_report_${meta.id}.html",
         outputFormat = "html_document",

diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
@@ -28,6 +28,8 @@ process MTX_TO_H5AD {
     script:
     def aligner = (input_aligner in [ 'cellranger', 'cellrangerarc', 'cellrangermulti' ]) ? 'cellranger' : input_aligner
 
+    aligner = input_aligner == "alevin" ? "simpleaf" : aligner
+
     template "mtx_to_h5ad_${aligner}.py"
 
     stub:

diff --git a/...les/local/templates/mtx_to_h5ad_alevin.py → ...s/local/templates/mtx_to_h5ad_simpleaf.py b/...les/local/templates/mtx_to_h5ad_alevin.py → ...s/local/templates/mtx_to_h5ad_simpleaf.py
@@ -85,7 +85,7 @@ def input_to_adata(
 
 # input_type comes from NF module
 input_to_adata(
-    input_data="${meta.id}_alevin_results/af_quant/alevin/",
+    input_data="${inputs}/alevin/",
     output="${meta.id}_${meta.input_type}_matrix.h5ad",
     sample="${meta.id}"
 )

diff --git a/nextflow.config b/nextflow.config
@@ -24,9 +24,9 @@ params {
     gtf               = null
 
     // alevin-fry parameters (simpleaf)
-    simpleaf_rlen     = 91
+    simpleaf_index      = null
     barcode_whitelist = null
-    salmon_index      = null
+    simpleaf_umi_resolution = "cr-like"
 
     // kallisto bustools parameters
     kallisto_index    = null

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -163,9 +163,9 @@
             "description": "",
             "default": "",
             "properties": {
-                "salmon_index": {
+                "simpleaf_index": {
                     "type": "string",
-                    "description": "This can be used to specify a precomputed Salmon index in the pipeline, in order to skip the generation of required indices by Salmon itself.",
+                    "description": "This can be used to specify a precomputed Simpleaf index, either generated by Salmon or Piscem, in the pipeline, in order to skip the generation of required indices by Simpleaf itself.",
                     "fa_icon": "fas fa-fish",
                     "format": "path",
                     "exists": true
@@ -177,10 +177,11 @@
                     "format": "file-path",
                     "exists": true
                 },
-                "simpleaf_rlen": {
-                    "type": "integer",
-                    "default": 91,
-                    "description": "It is the target read length the index will be built for, using simpleaf.",
+                "simpleaf_umi_resolution": {
+                    "type": "string",
+                    "default": "cr-like",
+                    "enum": ["cr-like", "cr-like-em", "parsimony", "parsimony-em", "parsimony-gene", "parsimony-gene-em"],
+                    "description": "UMI resolution strategy to deduplicate UMIs.",
                     "fa_icon": "fas fa-map-marked-alt"
                 }
             }

diff --git a/subworkflows/local/alevin.nf b/subworkflows/local/alevin.nf
diff --git a/subworkflows/local/simpleaf.nf b/subworkflows/local/simpleaf.nf
@@ -0,0 +1,98 @@
+/* --    IMPORT LOCAL MODULES/SUBWORKFLOWS     -- */
+include { ALEVINQC              } from '../../modules/local/alevinqc'
+include { SIMPLEAF_INDEX        } from '../../../modules/modules/nf-core/simpleaf/index'
+include { SIMPLEAF_QUANT        } from '../../../modules/modules/nf-core/simpleaf/quant'
+
+workflow SCRNASEQ_SIMPLEAF {
+
+    take:
+    ch_genome_fasta // channel
+    ch_genome_gtf   // channel
+    transcript_fasta
+    simpleaf_index
+    txp2gene
+    barcode_whitelist
+    chemistry
+    resolution
+    ch_fastq   // channel
+    map_dir
+
+    main:
+    ch_versions = Channel.empty()
+
+    /*
+    * Build salmon index
+    */
+    if ( !simpleaf_index || !map_dir ) {
+        // define input channels for index building
+        // we can either use the genome fasta and gtf files or the transcript fasta file
+        if ( transcript_fasta ) {
+            ch_genome_fasta_gtf = [ [:],[],[] ]
+            ch_transcript_fasta = Channel.of( [ [id: "${transcript_fasta.getBaseName()}"], transcript_fasta ] )
+        } else {
+            ch_genome_fasta_gtf = ch_genome_fasta.combine( ch_genome_gtf ).map{ fasta, gtf -> [[id: "${fasta.getBaseName()}"], fasta, gtf] }
+            ch_transcript_fasta = Channel.of( [ [:], [] ] )
+        }
+
+        SIMPLEAF_INDEX(
+            ch_genome_fasta_gtf,
+            ch_transcript_fasta
+        )
+        // Channel of tuple(meta, index dir)
+        simpleaf_index = SIMPLEAF_INDEX.out.index.collect()
+        // Channel of t2g path or empty
+        t2g = SIMPLEAF_INDEX.out.t2g.collect()
+        ch_versions = ch_versions.mix(SIMPLEAF_INDEX.out.versions)
+
+        // ensure txp2gene is a Channel
+        if (!txp2gene) {
+            txp2gene = t2g
+        } else {
+            txp2gene = Channel.of( txp2gene )
+        }
+    } else {
+        // ensure simpleaf index and txp2gene are Channels
+        simpleaf_index = Channel.of( [ [:], simpleaf_index ] )
+        txp2gene = Channel.of( txp2gene )
+    }
+
+    // define input channels for quantification
+    // we can either use the mapping results or the reads and index files
+    if ( map_dir ) {
+        ch_chemistry_reads = Channel.of( [ [:],[],[] ] )
+        ch_index_t2g = Channel.of( [ [:],[],[] ] )
+        ch_map_dir = Channel.of( [ [id: map_dir.baseName], map_dir ] )
+    } else {
+        ch_chemistry_reads = ch_fastq.map{ meta, files -> tuple(meta + ["chemistry": chemistry], chemistry, files) }
+        ch_index_t2g = simpleaf_index.combine( txp2gene )
+        ch_map_dir = [ [:],[] ]
+    }
+
+    /*
+    * Perform quantification with salmon alevin
+    */
+    SIMPLEAF_QUANT (
+        ch_chemistry_reads,
+        ch_index_t2g,
+        [[:], "unfiltered-pl", [], barcode_whitelist ],
+        resolution,
+        ch_map_dir
+    )
+    ch_versions = ch_versions.mix(SIMPLEAF_QUANT.out.versions)
+
+    ch_af_map = map_dir ? ch_map_dir : SIMPLEAF_QUANT.out.map
+    /*
+    * Run alevinQC
+    */
+    ALEVINQC( SIMPLEAF_QUANT.out.quant, SIMPLEAF_QUANT.out.quant, ch_af_map )
+    ch_versions = ch_versions.mix(ALEVINQC.out.versions)
+
+
+    emit:
+    ch_versions
+    txp2gene
+    index       = simpleaf_index
+    map         = SIMPLEAF_QUANT.out.map
+    quant       = SIMPLEAF_QUANT.out.quant
+    alevinqc    = ALEVINQC.out.report
+}
diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
@@ -11,7 +11,7 @@ include { methodsDescriptionText                            } from '../subworkfl
 include { getGenomeAttribute                                } from '../subworkflows/local/utils_nfcore_scrnaseq_pipeline'
 include { FASTQC_CHECK                                      } from '../subworkflows/local/fastqc'
 include { KALLISTO_BUSTOOLS                                 } from '../subworkflows/local/kallisto_bustools'
-include { SCRNASEQ_ALEVIN                                   } from '../subworkflows/local/alevin'
+include { SCRNASEQ_SIMPLEAF                                 } from '../subworkflows/local/simpleaf'
 include { STARSOLO                                          } from '../subworkflows/local/starsolo'
 include { CELLRANGER_ALIGN                                  } from "../subworkflows/local/align_cellranger"
 include { CELLRANGER_MULTI_ALIGN                            } from "../subworkflows/local/align_cellrangermulti"
@@ -64,7 +64,7 @@ workflow SCRNASEQ {
     kb_t2c            = params.kb_t2c         ? file(params.kb_t2c, checkIfExists: true) : []
 
     //salmon params
-    ch_salmon_index   = params.salmon_index ? file(params.salmon_index, checkIfExists: true) : []
+    ch_simpleaf_index   = params.simpleaf_index ? file(params.simpleaf_index, checkIfExists: true) : []
 
     //star params
     star_index        = params.star_index ? file(params.star_index, checkIfExists: true) : null
@@ -135,19 +135,32 @@ workflow SCRNASEQ {
 
     // Run salmon alevin pipeline
     if (params.aligner == "alevin") {
-        SCRNASEQ_ALEVIN(
+
+        SCRNASEQ_SIMPLEAF(
             ch_genome_fasta,
             ch_filter_gtf,
             ch_transcript_fasta,
-            ch_salmon_index,
+            ch_simpleaf_index,
             ch_txp2gene,
             ch_barcode_whitelist,
             protocol_config['protocol'],
-            ch_fastq
+            params.simpleaf_umi_resolution,
+            ch_fastq,
+            [] // for existing map dir; not applicable
         )
-        ch_versions = ch_versions.mix(SCRNASEQ_ALEVIN.out.ch_versions)
-        ch_multiqc_files = ch_multiqc_files.mix(SCRNASEQ_ALEVIN.out.alevin_results.map{ meta, it -> it })
-        ch_mtx_matrices = ch_mtx_matrices.mix( SCRNASEQ_ALEVIN.out.alevin_results )
+        ch_versions = ch_versions.mix(SCRNASEQ_SIMPLEAF.out.ch_versions)
+        ch_multiqc_files = ch_multiqc_files.mix(SCRNASEQ_SIMPLEAF.out.quant.map{ meta, it -> it })
+        ch_mtx_matrices = ch_mtx_matrices.mix(
+            SCRNASEQ_SIMPLEAF.out.quant.map{
+                meta, files -> [
+                    meta +
+                    [input_type: meta["filtered"] ? "filtered" : "raw" ],
+                    files
+                ]
+            }
+        )
+
+        ch_txp2gene = SCRNASEQ_SIMPLEAF.out.txp2gene
     }
 
     // Run STARSolo pipeline
@@ -284,7 +297,9 @@ workflow SCRNASEQ {
     if ( !params.skip_cellbender && !(params.aligner in ['cellrangerarc']) ) {
         // module should only run on the raw matrices thus, filter-out the filtered result of the aligners that can produce it
         H5AD_REMOVEBACKGROUND_BARCODES_CELLBENDER_ANNDATA (
-            ch_h5ads.filter { meta, mtx_files -> meta.input_type == 'raw' }
+            ch_h5ads
+                .filter { meta, mtx_files -> meta.input_type == 'raw' }
+                .map { meta, mtx_files -> [ meta + [input_type: 'filtered'], mtx_files ]} // to avoid name collision
         )
         ch_h5ads = ch_h5ads.mix(
             H5AD_REMOVEBACKGROUND_BARCODES_CELLBENDER_ANNDATA.out.h5ad
-Original file line number
+Diff line change
@@ Expand Up / @@ -28,6 +28,8 @@ process MTX_TO_H5AD { @@
         script:
         def aligner = (input_aligner in [ 'cellranger', 'cellrangerarc', 'cellrangermulti' ]) ? 'cellranger' : input_aligner
+        aligner = input_aligner == "alevin" ? "simpleaf" : aligner
         template "mtx_to_h5ad_${aligner}.py"
         stub:
@@ Expand Down @@