diff --git a/modules/nf-core/umitools/group/main.nf b/modules/nf-core/umitools/group/main.nf new file mode 100644 index 00000000000..9a6370b7954 --- /dev/null +++ b/modules/nf-core/umitools/group/main.nf @@ -0,0 +1,62 @@ +process UMITOOLS_GROUP { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::umi_tools=1.1.4" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.4--py38hbff2b2d_1' : + 'biocontainers/umi_tools:1.1.4--py38hbff2b2d_1' }" + + input: + tuple val(meta), path(bam), path(bai) + val create_bam + val get_group_info + + output: + tuple val(meta), path("*.log") , emit: log + tuple val(meta), path("${prefix}.bam"), optional: true, emit: bam + tuple val(meta), path("*.tsv") , optional: true, emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def paired = meta.single_end ? "" : "--paired" + output_bam = create_bam ? "--output-bam -S ${prefix}.bam" : "" + group_info = get_group_info ? "--group-out ${prefix}.tsv" : "" + + if (create_bam && "$bam" == "${prefix}.bam") { error "Input and output names are the same, set prefix in module configuration to disambiguate!" } + + if (!(args ==~ /.*--random-seed.*/)) {args += " --random-seed=100"} + """ + PYTHONHASHSEED=0 umi_tools \\ + group \\ + -I $bam \\ + $output_bam \\ + -L ${prefix}.log \\ + $group_info \\ + $paired \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + touch ${prefix}.log + touch ${prefix}.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/umitools/group/meta.yml b/modules/nf-core/umitools/group/meta.yml new file mode 100644 index 00000000000..1fa826dded6 --- /dev/null +++ b/modules/nf-core/umitools/group/meta.yml @@ -0,0 +1,62 @@ +name: umitools_group +description: Group reads based on their UMI and mapping coordinates +keywords: + - umitools + - umi + - deduplication + - dedup + - clustering +tools: + - umi_tools: + description: > + UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) + and single cell RNA-Seq cell barcodes + documentation: https://umi-tools.readthedocs.io/en/latest/ + license: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: | + BAM file containing reads to be deduplicated via UMIs. + pattern: "*.{bam}" + - bai: + type: file + description: | + BAM index files corresponding to the input BAM file. + pattern: "*.{bai}" + - create_bam: + type: boolean + description: | + Whether or not to create a read group tagged BAM file. + - get_group_info: + type: boolean + description: | + Whether or not to generate the flatfile describing the read groups, see docs for complete info of all columns + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: a read group tagged BAM file. + pattern: "${prefix}.{bam}" + - log: + type: file + description: File with logging information + pattern: "*.{log}" + - tsv: + type: file + description: Flatfile describing the read groups, see docs for complete info of all columns + pattern: "*.{tsv}" + +authors: + - "@Joon-Klaps" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 3aeccf279b5..b3866fffd42 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -4009,6 +4009,10 @@ umitools/extract: - modules/nf-core/umitools/extract/** - tests/modules/nf-core/umitools/extract/** +umitools/group: + - modules/nf-core/umitools/group/** + - tests/modules/nf-core/umitools/group/** + unicycler: - modules/nf-core/unicycler/** - tests/modules/nf-core/unicycler/** diff --git a/tests/modules/nf-core/umitools/group/main.nf b/tests/modules/nf-core/umitools/group/main.nf new file mode 100644 index 00000000000..0540b906187 --- /dev/null +++ b/tests/modules/nf-core/umitools/group/main.nf @@ -0,0 +1,96 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { UMITOOLS_EXTRACT } from '../../../../../modules/nf-core/umitools/extract/main.nf' +include { BWA_INDEX } from '../../../../../modules/nf-core/bwa/index/main.nf' +include { BWA_MEM } from '../../../../../modules/nf-core/bwa/mem/main.nf' +include { SAMTOOLS_INDEX } from '../../../../../modules/nf-core/samtools/index/main.nf' +include { UMITOOLS_GROUP } from '../../../../../modules/nf-core/umitools/group/main.nf' + +// +// Test with no UMI +// +workflow test_umitools_group_no_umi { + input = [ + [ id:'test'], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + ] + create_bam = true + get_group_info = true + + UMITOOLS_GROUP ( input, create_bam, get_group_info ) +} + +// +// Test with single-end data with BAM false and group info true +// +workflow test_umitools_group_single_end_info { + input = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + fasta = [ + [ id:'sarscov2'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + create_bam = false + get_group_info = true + + UMITOOLS_EXTRACT ( input ) + BWA_INDEX ( fasta ) + BWA_MEM ( UMITOOLS_EXTRACT.out.reads, BWA_INDEX.out.index, true ) + SAMTOOLS_INDEX ( BWA_MEM.out.bam ) + UMITOOLS_GROUP ( BWA_MEM.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]), create_bam, get_group_info ) +} + +// +// Test with paired-end data with BAM true and group info false +// +workflow test_umitools_group_paired_end_bam { + input = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + fasta = [ + [ id:'sarscov2'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + create_bam = true + get_group_info = false + + UMITOOLS_EXTRACT ( input ) + BWA_INDEX ( fasta ) + BWA_MEM ( UMITOOLS_EXTRACT.out.reads, BWA_INDEX.out.index, true ) + SAMTOOLS_INDEX ( BWA_MEM.out.bam ) + UMITOOLS_GROUP ( BWA_MEM.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]), create_bam, get_group_info ) +} + +// +// Test with paired-end data BAM true and group info true +// +workflow test_umitools_group_paired_bam_info { + input = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + fasta = [ + [ id:'sarscov2'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + create_bam = true + get_group_info = true + + UMITOOLS_EXTRACT ( input ) + BWA_INDEX ( fasta ) + BWA_MEM ( UMITOOLS_EXTRACT.out.reads, BWA_INDEX.out.index, true ) + SAMTOOLS_INDEX ( BWA_MEM.out.bam ) + UMITOOLS_GROUP ( BWA_MEM.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]), create_bam, get_group_info ) +} diff --git a/tests/modules/nf-core/umitools/group/nextflow.config b/tests/modules/nf-core/umitools/group/nextflow.config new file mode 100644 index 00000000000..93d50ad5d6a --- /dev/null +++ b/tests/modules/nf-core/umitools/group/nextflow.config @@ -0,0 +1,27 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + + withName: BWA_INDEX { + publishDir = [enabled : false ] + } + + withName: BWA_MEM { + publishDir = [enabled : false ] + } + + withName: SAMTOOLS_INDEX { + publishDir = [enabled : false ] + } + + withName: UMITOOLS_EXTRACT { + ext.args = '--bc-pattern="NNNN"' + publishDir = [ enabled : false ] + } + + withName: UMITOOLS_GROUP { + ext.prefix = { "${meta.id}.group" } + } + +} diff --git a/tests/modules/nf-core/umitools/group/test.yml b/tests/modules/nf-core/umitools/group/test.yml new file mode 100644 index 00000000000..ba81292e0fb --- /dev/null +++ b/tests/modules/nf-core/umitools/group/test.yml @@ -0,0 +1,51 @@ +- name: umitools group test_umitools_group_no_umi + command: nextflow run ./tests/modules/nf-core/umitools/group -entry test_umitools_group_no_umi -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/umitools/group/nextflow.config + tags: + - umitools/group + - umitools + files: + - path: output/umitools/test.group.bam + md5sum: fb90b49a90c2b3e8ddfedd9c95361625 + - path: output/umitools/test.group.log + contains: ["# UMI-tools version:"] + - path: output/umitools/test.group.tsv + md5sum: 1a3ccdc00df59fa89c79ad482980a003 + - path: output/umitools/versions.yml + +- name: umitools group test_umitools_group_single_end_info + command: nextflow run ./tests/modules/nf-core/umitools/group -entry test_umitools_group_single_end_info -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/umitools/group/nextflow.config + tags: + - umitools/group + - umitools + files: + - path: output/umitools/test.group.log + contains: ["# UMI-tools version:"] + - path: output/umitools/test.group.tsv + md5sum: e7d46166eb3d8f42d73032e44f313b71 + - path: output/umitools/versions.yml + +- name: umitools group test_umitools_group_paired_end_bam + command: nextflow run ./tests/modules/nf-core/umitools/group -entry test_umitools_group_paired_end_bam -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/umitools/group/nextflow.config + tags: + - umitools/group + - umitools + files: + - path: output/umitools/test.group.bam + md5sum: 8f9250416a7ccbe970f5034826c318f5 + - path: output/umitools/test.group.log + contains: ["# UMI-tools version:"] + - path: output/umitools/versions.yml + +- name: umitools group test_umitools_group_paired_bam_info + command: nextflow run ./tests/modules/nf-core/umitools/group -entry test_umitools_group_paired_bam_info -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/umitools/group/nextflow.config + tags: + - umitools/group + - umitools + files: + - path: output/umitools/test.group.bam + md5sum: 8f9250416a7ccbe970f5034826c318f5 + - path: output/umitools/test.group.log + contains: ["# UMI-tools version:"] + - path: output/umitools/test.group.tsv + md5sum: d652eb6570057e9e709e8cac5f43d00c + - path: output/umitools/versions.yml