Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New module: umitools/group #3765

Merged
merged 4 commits into from
Aug 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions modules/nf-core/umitools/group/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
process UMITOOLS_GROUP {
tag "$meta.id"
label 'process_medium'

conda "bioconda::umi_tools=1.1.4"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/umi_tools:1.1.4--py38hbff2b2d_1' :
'biocontainers/umi_tools:1.1.4--py38hbff2b2d_1' }"

input:
tuple val(meta), path(bam), path(bai)
val create_bam
val get_group_info

output:
tuple val(meta), path("*.log") , emit: log
tuple val(meta), path("${prefix}.bam"), optional: true, emit: bam
tuple val(meta), path("*.tsv") , optional: true, emit: tsv
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
def paired = meta.single_end ? "" : "--paired"
output_bam = create_bam ? "--output-bam -S ${prefix}.bam" : ""
group_info = get_group_info ? "--group-out ${prefix}.tsv" : ""

if (create_bam && "$bam" == "${prefix}.bam") { error "Input and output names are the same, set prefix in module configuration to disambiguate!" }

if (!(args ==~ /.*--random-seed.*/)) {args += " --random-seed=100"}
"""
PYTHONHASHSEED=0 umi_tools \\
Joon-Klaps marked this conversation as resolved.
Show resolved Hide resolved
group \\
-I $bam \\
$output_bam \\
-L ${prefix}.log \\
$group_info \\
$paired \\
$args

cat <<-END_VERSIONS > versions.yml
"${task.process}":
umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//')
END_VERSIONS
"""

stub:
prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.bam
touch ${prefix}.log
touch ${prefix}.tsv

cat <<-END_VERSIONS > versions.yml
"${task.process}":
umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//')
END_VERSIONS
"""
}
62 changes: 62 additions & 0 deletions modules/nf-core/umitools/group/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
name: umitools_group
description: Group reads based on their UMI and mapping coordinates
keywords:
- umitools
- umi
- deduplication
- dedup
- clustering
tools:
- umi_tools:
description: >
UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs)
and single cell RNA-Seq cell barcodes
documentation: https://umi-tools.readthedocs.io/en/latest/
license: ["MIT"]

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: |
BAM file containing reads to be deduplicated via UMIs.
pattern: "*.{bam}"
- bai:
type: file
description: |
BAM index files corresponding to the input BAM file.
pattern: "*.{bai}"
- create_bam:
type: boolean
description: |
Whether or not to create a read group tagged BAM file.
- get_group_info:
type: boolean
description: |
Whether or not to generate the flatfile describing the read groups, see docs for complete info of all columns

output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: a read group tagged BAM file.
pattern: "${prefix}.{bam}"
- log:
type: file
description: File with logging information
pattern: "*.{log}"
- tsv:
type: file
description: Flatfile describing the read groups, see docs for complete info of all columns
pattern: "*.{tsv}"

authors:
- "@Joon-Klaps"
4 changes: 4 additions & 0 deletions tests/config/pytest_modules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4009,6 +4009,10 @@ umitools/extract:
- modules/nf-core/umitools/extract/**
- tests/modules/nf-core/umitools/extract/**

umitools/group:
- modules/nf-core/umitools/group/**
- tests/modules/nf-core/umitools/group/**

unicycler:
- modules/nf-core/unicycler/**
- tests/modules/nf-core/unicycler/**
Expand Down
96 changes: 96 additions & 0 deletions tests/modules/nf-core/umitools/group/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

include { UMITOOLS_EXTRACT } from '../../../../../modules/nf-core/umitools/extract/main.nf'
include { BWA_INDEX } from '../../../../../modules/nf-core/bwa/index/main.nf'
include { BWA_MEM } from '../../../../../modules/nf-core/bwa/mem/main.nf'
include { SAMTOOLS_INDEX } from '../../../../../modules/nf-core/samtools/index/main.nf'
include { UMITOOLS_GROUP } from '../../../../../modules/nf-core/umitools/group/main.nf'

//
// Test with no UMI
//
workflow test_umitools_group_no_umi {
input = [
[ id:'test'], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)
]
create_bam = true
get_group_info = true

UMITOOLS_GROUP ( input, create_bam, get_group_info )
}

//
// Test with single-end data with BAM false and group info true
//
workflow test_umitools_group_single_end_info {
input = [
[ id:'test', single_end:true ], // meta map
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
]
fasta = [
[ id:'sarscov2'],
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
]
create_bam = false
get_group_info = true

UMITOOLS_EXTRACT ( input )
BWA_INDEX ( fasta )
BWA_MEM ( UMITOOLS_EXTRACT.out.reads, BWA_INDEX.out.index, true )
SAMTOOLS_INDEX ( BWA_MEM.out.bam )
UMITOOLS_GROUP ( BWA_MEM.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]), create_bam, get_group_info )
}

//
// Test with paired-end data with BAM true and group info false
//
workflow test_umitools_group_paired_end_bam {
input = [
[ id:'test', single_end:false ], // meta map
[
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
]
]
fasta = [
[ id:'sarscov2'],
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
]
create_bam = true
get_group_info = false

UMITOOLS_EXTRACT ( input )
BWA_INDEX ( fasta )
BWA_MEM ( UMITOOLS_EXTRACT.out.reads, BWA_INDEX.out.index, true )
SAMTOOLS_INDEX ( BWA_MEM.out.bam )
UMITOOLS_GROUP ( BWA_MEM.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]), create_bam, get_group_info )
}

//
// Test with paired-end data BAM true and group info true
//
workflow test_umitools_group_paired_bam_info {
input = [
[ id:'test', single_end:false ], // meta map
[
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
]
]
fasta = [
[ id:'sarscov2'],
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
]
create_bam = true
get_group_info = true

UMITOOLS_EXTRACT ( input )
BWA_INDEX ( fasta )
BWA_MEM ( UMITOOLS_EXTRACT.out.reads, BWA_INDEX.out.index, true )
SAMTOOLS_INDEX ( BWA_MEM.out.bam )
UMITOOLS_GROUP ( BWA_MEM.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]), create_bam, get_group_info )
}
27 changes: 27 additions & 0 deletions tests/modules/nf-core/umitools/group/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
process {

publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }


withName: BWA_INDEX {
publishDir = [enabled : false ]
}

withName: BWA_MEM {
publishDir = [enabled : false ]
}

withName: SAMTOOLS_INDEX {
publishDir = [enabled : false ]
}

withName: UMITOOLS_EXTRACT {
ext.args = '--bc-pattern="NNNN"'
publishDir = [ enabled : false ]
}

withName: UMITOOLS_GROUP {
ext.prefix = { "${meta.id}.group" }
}

}
51 changes: 51 additions & 0 deletions tests/modules/nf-core/umitools/group/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
- name: umitools group test_umitools_group_no_umi
command: nextflow run ./tests/modules/nf-core/umitools/group -entry test_umitools_group_no_umi -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/umitools/group/nextflow.config
tags:
- umitools/group
- umitools
files:
- path: output/umitools/test.group.bam
md5sum: fb90b49a90c2b3e8ddfedd9c95361625
- path: output/umitools/test.group.log
contains: ["# UMI-tools version:"]
- path: output/umitools/test.group.tsv
md5sum: 1a3ccdc00df59fa89c79ad482980a003
- path: output/umitools/versions.yml

- name: umitools group test_umitools_group_single_end_info
command: nextflow run ./tests/modules/nf-core/umitools/group -entry test_umitools_group_single_end_info -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/umitools/group/nextflow.config
tags:
- umitools/group
- umitools
files:
- path: output/umitools/test.group.log
contains: ["# UMI-tools version:"]
- path: output/umitools/test.group.tsv
md5sum: e7d46166eb3d8f42d73032e44f313b71
- path: output/umitools/versions.yml

- name: umitools group test_umitools_group_paired_end_bam
command: nextflow run ./tests/modules/nf-core/umitools/group -entry test_umitools_group_paired_end_bam -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/umitools/group/nextflow.config
tags:
- umitools/group
- umitools
files:
- path: output/umitools/test.group.bam
md5sum: 8f9250416a7ccbe970f5034826c318f5
- path: output/umitools/test.group.log
contains: ["# UMI-tools version:"]
- path: output/umitools/versions.yml

- name: umitools group test_umitools_group_paired_bam_info
command: nextflow run ./tests/modules/nf-core/umitools/group -entry test_umitools_group_paired_bam_info -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/umitools/group/nextflow.config
tags:
- umitools/group
- umitools
files:
- path: output/umitools/test.group.bam
md5sum: 8f9250416a7ccbe970f5034826c318f5
- path: output/umitools/test.group.log
contains: ["# UMI-tools version:"]
- path: output/umitools/test.group.tsv
md5sum: d652eb6570057e9e709e8cac5f43d00c
- path: output/umitools/versions.yml