Skip to content

Commit

Permalink
New module: umitools/group (#3765)
Browse files Browse the repository at this point in the history
* init umitools/group

* remove todo's umitools/group

* fix prettier

* remove redundant variables stub
  • Loading branch information
Joon-Klaps authored Aug 29, 2023
1 parent f05fa7c commit 90038ef
Show file tree
Hide file tree
Showing 6 changed files with 302 additions and 0 deletions.
62 changes: 62 additions & 0 deletions modules/nf-core/umitools/group/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
process UMITOOLS_GROUP {
tag "$meta.id"
label 'process_medium'

conda "bioconda::umi_tools=1.1.4"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/umi_tools:1.1.4--py38hbff2b2d_1' :
'biocontainers/umi_tools:1.1.4--py38hbff2b2d_1' }"

input:
tuple val(meta), path(bam), path(bai)
val create_bam
val get_group_info

output:
tuple val(meta), path("*.log") , emit: log
tuple val(meta), path("${prefix}.bam"), optional: true, emit: bam
tuple val(meta), path("*.tsv") , optional: true, emit: tsv
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
def paired = meta.single_end ? "" : "--paired"
output_bam = create_bam ? "--output-bam -S ${prefix}.bam" : ""
group_info = get_group_info ? "--group-out ${prefix}.tsv" : ""

if (create_bam && "$bam" == "${prefix}.bam") { error "Input and output names are the same, set prefix in module configuration to disambiguate!" }

if (!(args ==~ /.*--random-seed.*/)) {args += " --random-seed=100"}
"""
PYTHONHASHSEED=0 umi_tools \\
group \\
-I $bam \\
$output_bam \\
-L ${prefix}.log \\
$group_info \\
$paired \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//')
END_VERSIONS
"""

stub:
prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.bam
touch ${prefix}.log
touch ${prefix}.tsv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//')
END_VERSIONS
"""
}
62 changes: 62 additions & 0 deletions modules/nf-core/umitools/group/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
name: umitools_group
description: Group reads based on their UMI and mapping coordinates
keywords:
- umitools
- umi
- deduplication
- dedup
- clustering
tools:
- umi_tools:
description: >
UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs)
and single cell RNA-Seq cell barcodes
documentation: https://umi-tools.readthedocs.io/en/latest/
license: ["MIT"]

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: |
BAM file containing reads to be deduplicated via UMIs.
pattern: "*.{bam}"
- bai:
type: file
description: |
BAM index files corresponding to the input BAM file.
pattern: "*.{bai}"
- create_bam:
type: boolean
description: |
Whether or not to create a read group tagged BAM file.
- get_group_info:
type: boolean
description: |
Whether or not to generate the flatfile describing the read groups, see docs for complete info of all columns
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: a read group tagged BAM file.
pattern: "${prefix}.{bam}"
- log:
type: file
description: File with logging information
pattern: "*.{log}"
- tsv:
type: file
description: Flatfile describing the read groups, see docs for complete info of all columns
pattern: "*.{tsv}"

authors:
- "@Joon-Klaps"
4 changes: 4 additions & 0 deletions tests/config/pytest_modules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4029,6 +4029,10 @@ umitools/extract:
- modules/nf-core/umitools/extract/**
- tests/modules/nf-core/umitools/extract/**

umitools/group:
- modules/nf-core/umitools/group/**
- tests/modules/nf-core/umitools/group/**

unicycler:
- modules/nf-core/unicycler/**
- tests/modules/nf-core/unicycler/**
Expand Down
96 changes: 96 additions & 0 deletions tests/modules/nf-core/umitools/group/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

include { UMITOOLS_EXTRACT } from '../../../../../modules/nf-core/umitools/extract/main.nf'
include { BWA_INDEX } from '../../../../../modules/nf-core/bwa/index/main.nf'
include { BWA_MEM } from '../../../../../modules/nf-core/bwa/mem/main.nf'
include { SAMTOOLS_INDEX } from '../../../../../modules/nf-core/samtools/index/main.nf'
include { UMITOOLS_GROUP } from '../../../../../modules/nf-core/umitools/group/main.nf'

//
// Test with no UMI
//
workflow test_umitools_group_no_umi {
input = [
[ id:'test'], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)
]
create_bam = true
get_group_info = true

UMITOOLS_GROUP ( input, create_bam, get_group_info )
}

//
// Test with single-end data with BAM false and group info true
//
workflow test_umitools_group_single_end_info {
input = [
[ id:'test', single_end:true ], // meta map
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
]
fasta = [
[ id:'sarscov2'],
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
]
create_bam = false
get_group_info = true

UMITOOLS_EXTRACT ( input )
BWA_INDEX ( fasta )
BWA_MEM ( UMITOOLS_EXTRACT.out.reads, BWA_INDEX.out.index, true )
SAMTOOLS_INDEX ( BWA_MEM.out.bam )
UMITOOLS_GROUP ( BWA_MEM.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]), create_bam, get_group_info )
}

//
// Test with paired-end data with BAM true and group info false
//
workflow test_umitools_group_paired_end_bam {
input = [
[ id:'test', single_end:false ], // meta map
[
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
]
]
fasta = [
[ id:'sarscov2'],
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
]
create_bam = true
get_group_info = false

UMITOOLS_EXTRACT ( input )
BWA_INDEX ( fasta )
BWA_MEM ( UMITOOLS_EXTRACT.out.reads, BWA_INDEX.out.index, true )
SAMTOOLS_INDEX ( BWA_MEM.out.bam )
UMITOOLS_GROUP ( BWA_MEM.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]), create_bam, get_group_info )
}

//
// Test with paired-end data BAM true and group info true
//
workflow test_umitools_group_paired_bam_info {
input = [
[ id:'test', single_end:false ], // meta map
[
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
]
]
fasta = [
[ id:'sarscov2'],
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
]
create_bam = true
get_group_info = true

UMITOOLS_EXTRACT ( input )
BWA_INDEX ( fasta )
BWA_MEM ( UMITOOLS_EXTRACT.out.reads, BWA_INDEX.out.index, true )
SAMTOOLS_INDEX ( BWA_MEM.out.bam )
UMITOOLS_GROUP ( BWA_MEM.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]), create_bam, get_group_info )
}
27 changes: 27 additions & 0 deletions tests/modules/nf-core/umitools/group/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
process {

publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }


withName: BWA_INDEX {
publishDir = [enabled : false ]
}

withName: BWA_MEM {
publishDir = [enabled : false ]
}

withName: SAMTOOLS_INDEX {
publishDir = [enabled : false ]
}

withName: UMITOOLS_EXTRACT {
ext.args = '--bc-pattern="NNNN"'
publishDir = [ enabled : false ]
}

withName: UMITOOLS_GROUP {
ext.prefix = { "${meta.id}.group" }
}

}
51 changes: 51 additions & 0 deletions tests/modules/nf-core/umitools/group/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
- name: umitools group test_umitools_group_no_umi
command: nextflow run ./tests/modules/nf-core/umitools/group -entry test_umitools_group_no_umi -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/umitools/group/nextflow.config
tags:
- umitools/group
- umitools
files:
- path: output/umitools/test.group.bam
md5sum: fb90b49a90c2b3e8ddfedd9c95361625
- path: output/umitools/test.group.log
contains: ["# UMI-tools version:"]
- path: output/umitools/test.group.tsv
md5sum: 1a3ccdc00df59fa89c79ad482980a003
- path: output/umitools/versions.yml

- name: umitools group test_umitools_group_single_end_info
command: nextflow run ./tests/modules/nf-core/umitools/group -entry test_umitools_group_single_end_info -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/umitools/group/nextflow.config
tags:
- umitools/group
- umitools
files:
- path: output/umitools/test.group.log
contains: ["# UMI-tools version:"]
- path: output/umitools/test.group.tsv
md5sum: e7d46166eb3d8f42d73032e44f313b71
- path: output/umitools/versions.yml

- name: umitools group test_umitools_group_paired_end_bam
command: nextflow run ./tests/modules/nf-core/umitools/group -entry test_umitools_group_paired_end_bam -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/umitools/group/nextflow.config
tags:
- umitools/group
- umitools
files:
- path: output/umitools/test.group.bam
md5sum: 8f9250416a7ccbe970f5034826c318f5
- path: output/umitools/test.group.log
contains: ["# UMI-tools version:"]
- path: output/umitools/versions.yml

- name: umitools group test_umitools_group_paired_bam_info
command: nextflow run ./tests/modules/nf-core/umitools/group -entry test_umitools_group_paired_bam_info -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/umitools/group/nextflow.config
tags:
- umitools/group
- umitools
files:
- path: output/umitools/test.group.bam
md5sum: 8f9250416a7ccbe970f5034826c318f5
- path: output/umitools/test.group.log
contains: ["# UMI-tools version:"]
- path: output/umitools/test.group.tsv
md5sum: d652eb6570057e9e709e8cac5f43d00c
- path: output/umitools/versions.yml

0 comments on commit 90038ef

Please sign in to comment.