\\n Workflow | \\n Nextflow | \\n\\"
+ ],
+ [
+ "CUSTOM_DUMPSOFTWAREVERSIONS:",
+ " python: 3.11.7",
+ " yaml: 5.4.1",
+ "TOOL1:",
+ " tool1: 0.11.9",
+ "TOOL2:",
+ " tool2: '1.9'",
+ "Workflow:"
+ ]
+ ],
+ "timestamp": "2024-01-09T23:01:18.710682"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml
new file mode 100644
index 0000000..405aa24
--- /dev/null
+++ b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml
@@ -0,0 +1,2 @@
+custom/dumpsoftwareversions:
+ - modules/nf-core/custom/dumpsoftwareversions/**
diff --git a/modules/nf-core/fastavalidator/environment.yml b/modules/nf-core/fastavalidator/environment.yml
new file mode 100644
index 0000000..70f346e
--- /dev/null
+++ b/modules/nf-core/fastavalidator/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "fastavalidator"
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - "bioconda::py_fasta_validator=0.6"
diff --git a/modules/nf-core/fastavalidator/main.nf b/modules/nf-core/fastavalidator/main.nf
new file mode 100644
index 0000000..ac5470f
--- /dev/null
+++ b/modules/nf-core/fastavalidator/main.nf
@@ -0,0 +1,62 @@
+process FASTAVALIDATOR {
+ tag "$meta.id"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/py_fasta_validator:0.6--py37h595c7a6_0':
+ 'biocontainers/py_fasta_validator:0.6--py37h595c7a6_0' }"
+
+ input:
+ tuple val(meta), path(fasta)
+
+ output:
+ tuple val(meta), path('*.success.log') , emit: success_log , optional: true
+ tuple val(meta), path('*.error.log') , emit: error_log , optional: true
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ py_fasta_validator \\
+ -f $fasta \\
+ 2> "${prefix}.error.log" \\
+ || echo "Errors from fasta_validate printed to ${prefix}.error.log"
+
+ if [ \$(cat "${prefix}.error.log" | wc -l) -gt 0 ]; then
+ echo "Validation failed..."
+
+ cat \\
+ "${prefix}.error.log"
+ else
+ echo "Validation successful..."
+
+ mv \\
+ "${prefix}.error.log" \\
+ fasta_validate.stderr
+
+ echo "Validation successful..." \\
+ > "${prefix}.success.log"
+ fi
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ py_fasta_validator: \$(py_fasta_validator -v | sed 's/.* version //')
+ END_VERSIONS
+ """
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ echo "Validation successful..." \\
+ > "${prefix}.success.log"
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ py_fasta_validator: \$(py_fasta_validator -v | sed 's/.* version //')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/fastavalidator/meta.yml b/modules/nf-core/fastavalidator/meta.yml
new file mode 100644
index 0000000..c5c4371
--- /dev/null
+++ b/modules/nf-core/fastavalidator/meta.yml
@@ -0,0 +1,53 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "fastavalidator"
+description: |
+ "Python C-extension for a simple validator for fasta files. The module emits the validated file or an
+ error log upon validation failure."
+keywords:
+ - fasta
+ - validation
+ - genome
+tools:
+ - fasta_validate:
+ description: |
+ "Python C-extension for a simple C code to validate a fasta file. It only checks a few things,
+ and by default only sets its response via the return code,
+ so you will need to check that!"
+ homepage: "https://github.com/linsalrob/py_fasta_validator"
+ documentation: "https://github.com/linsalrob/py_fasta_validator"
+ tool_dev_url: "https://github.com/linsalrob/py_fasta_validator"
+ doi: "10.5281/zenodo.5002710"
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing file information
+ e.g. [ id:'test' ]
+ - fasta:
+ type: file
+ description: Input fasta file
+ pattern: "*.fasta"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing file information
+ e.g. [ id:'test' ]
+ - success_log:
+ type: file
+ description: Log file for successful validation
+ pattern: "*.success.log"
+ - error_log:
+ type: file
+ description: Log file for failed validation
+ pattern: "*.error.log"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@gallvp"
+maintainers:
+ - "@gallvp"
diff --git a/modules/nf-core/fastavalidator/tests/main.nf.test b/modules/nf-core/fastavalidator/tests/main.nf.test
new file mode 100644
index 0000000..bb8c22c
--- /dev/null
+++ b/modules/nf-core/fastavalidator/tests/main.nf.test
@@ -0,0 +1,60 @@
+nextflow_process {
+
+ name "Test Process FASTAVALIDATOR"
+ script "../main.nf"
+ process "FASTAVALIDATOR"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "fastavalidator"
+
+ test("sarscov2-fasta-valid") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert process.out.success_log != null },
+ { assert process.out.error_log == [] },
+ { assert path(process.out.success_log.get(0).get(1)).getText().contains("Validation successful...") }
+ )
+ }
+
+ }
+
+ test("sarscov2-gff3-invalid") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert process.out.success_log == [] },
+ { assert process.out.error_log != null },
+ { assert path(process.out.error_log.get(0).get(1)).getText().contains("genome.gff3 does not start with a >") }
+ )
+ }
+
+ }
+}
diff --git a/modules/nf-core/fastavalidator/tests/main.nf.test.snap b/modules/nf-core/fastavalidator/tests/main.nf.test.snap
new file mode 100644
index 0000000..382dee7
--- /dev/null
+++ b/modules/nf-core/fastavalidator/tests/main.nf.test.snap
@@ -0,0 +1,76 @@
+{
+ "sarscov2-fasta-valid": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.success.log:md5,b0b859eda1db5cd43915846e00ebc22c"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+ "versions.yml:md5,05aa059840b3b4dd6d88bc1e4bf976d7"
+ ],
+ "error_log": [
+
+ ],
+ "success_log": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.success.log:md5,b0b859eda1db5cd43915846e00ebc22c"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,05aa059840b3b4dd6d88bc1e4bf976d7"
+ ]
+ }
+ ],
+ "timestamp": "2023-11-28T11:23:25.106872"
+ },
+ "sarscov2-gff3-invalid": {
+ "content": [
+ {
+ "0": [
+
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.error.log:md5,531d520c0e7767176f743f197f1f87b3"
+ ]
+ ],
+ "2": [
+ "versions.yml:md5,05aa059840b3b4dd6d88bc1e4bf976d7"
+ ],
+ "error_log": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.error.log:md5,531d520c0e7767176f743f197f1f87b3"
+ ]
+ ],
+ "success_log": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,05aa059840b3b4dd6d88bc1e4bf976d7"
+ ]
+ }
+ ],
+ "timestamp": "2023-11-28T11:23:29.40324"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/fastavalidator/tests/tags.yml b/modules/nf-core/fastavalidator/tests/tags.yml
new file mode 100644
index 0000000..c3c7757
--- /dev/null
+++ b/modules/nf-core/fastavalidator/tests/tags.yml
@@ -0,0 +1,2 @@
+fastavalidator:
+ - "modules/nf-core/fastavalidator/**"
diff --git a/modules/nf-core/fastp/environment.yml b/modules/nf-core/fastp/environment.yml
new file mode 100644
index 0000000..70389e6
--- /dev/null
+++ b/modules/nf-core/fastp/environment.yml
@@ -0,0 +1,7 @@
+name: fastp
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::fastp=0.23.4
diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf
index ee38e1d..2a3b679 100644
--- a/modules/nf-core/fastp/main.nf
+++ b/modules/nf-core/fastp/main.nf
@@ -2,7 +2,7 @@ process FASTP {
tag "$meta.id"
label 'process_medium'
- conda "bioconda::fastp=0.23.4"
+ conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/fastp:0.23.4--h5f740d0_0' :
'biocontainers/fastp:0.23.4--h5f740d0_0' }"
@@ -45,7 +45,7 @@ process FASTP {
$adapter_list \\
$fail_fastq \\
$args \\
- 2> ${prefix}.fastp.log \\
+ 2> >(tee ${prefix}.fastp.log >&2) \\
| gzip -c > ${prefix}.fastp.fastq.gz
cat <<-END_VERSIONS > versions.yml
@@ -66,7 +66,7 @@ process FASTP {
$adapter_list \\
$fail_fastq \\
$args \\
- 2> ${prefix}.fastp.log
+ 2> >(tee ${prefix}.fastp.log >&2)
cat <<-END_VERSIONS > versions.yml
"${task.process}":
@@ -91,7 +91,7 @@ process FASTP {
--thread $task.cpus \\
--detect_adapter_for_pe \\
$args \\
- 2> ${prefix}.fastp.log
+ 2> >(tee ${prefix}.fastp.log >&2)
cat <<-END_VERSIONS > versions.yml
"${task.process}":
@@ -101,13 +101,17 @@ process FASTP {
}
stub:
- def prefix = task.ext.prefix ?: "${meta.id}"
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def is_single_output = task.ext.args?.contains('--interleaved_in') || meta.single_end
+ def touch_reads = is_single_output ? "${prefix}.fastp.fastq.gz" : "${prefix}_1.fastp.fastq.gz ${prefix}_2.fastp.fastq.gz"
+ def touch_merged = (!is_single_output && save_merged) ? "touch ${prefix}.merged.fastq.gz" : ""
"""
- touch "${prefix}.fastp.fastq.gz"
- touch "${prefix}.json"
- touch "${prefix}.html"
- touch "${prefix}.log"
-
+ touch $touch_reads
+ touch "${prefix}.fastp.json"
+ touch "${prefix}.fastp.html"
+ touch "${prefix}.fastp.log"
+ $touch_merged
+
cat <<-END_VERSIONS > versions.yml
"${task.process}":
fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml
index 1c34ff9..c22a16a 100644
--- a/modules/nf-core/fastp/meta.yml
+++ b/modules/nf-core/fastp/meta.yml
@@ -33,7 +33,6 @@ input:
- save_merged:
type: boolean
description: Specify true to save all merged reads to the a file ending in `*.merged.fastq.gz`
-
output:
- meta:
type: map
@@ -71,4 +70,6 @@ output:
authors:
- "@drpatelh"
- "@kevinmenden"
- - "@gallvp"
+maintainers:
+ - "@drpatelh"
+ - "@kevinmenden"
diff --git a/modules/nf-core/fastp/tests/main.nf.test b/modules/nf-core/fastp/tests/main.nf.test
new file mode 100644
index 0000000..17dce8a
--- /dev/null
+++ b/modules/nf-core/fastp/tests/main.nf.test
@@ -0,0 +1,726 @@
+nextflow_process {
+
+ name "Test Process FASTP"
+ script "../main.nf"
+ process "FASTP"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "fastp"
+
+ test("test_fastp_single_end") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ adapter_fasta = []
+ save_trimmed_fail = false
+ save_merged = false
+
+ input[0] = [
+ [ id:'test', single_end:true ],
+ [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
+ ]
+
+ input[1] = adapter_fasta
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ def html_text = [ "Q20 bases:12.922000 K (92.984097%)",
+ "single end (151 cycles)" ]
+ def log_text = [ "Q20 bases: 12922(92.9841%)",
+ "reads passed filter: 99" ]
+ def read_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1",
+ "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
+ "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE
+ { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) }
+ }
+ },
+ { html_text.each { html_part ->
+ { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
+ }
+ },
+ { assert snapshot(process.out.json).match("test_fastp_single_end_json") },
+ { log_text.each { log_part ->
+ { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
+ }
+ },
+ {
+ assert snapshot(
+ (
+ [process.out.reads[0][0].toString()] + // meta
+ process.out.reads.collect { file(it[1]).getName() } +
+ process.out.json.collect { file(it[1]).getName() } +
+ process.out.html.collect { file(it[1]).getName() } +
+ process.out.log.collect { file(it[1]).getName() } +
+ process.out.reads_fail.collect { file(it[1]).getName() } +
+ process.out.reads_merged.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("test_fastp_single_end-for_stub_match")
+ },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+
+ test("test_fastp_single_end-stub") {
+
+ options '-stub'
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ adapter_fasta = []
+ save_trimmed_fail = false
+ save_merged = false
+
+ input[0] = [
+ [ id:'test', single_end:true ],
+ [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
+ ]
+
+ input[1] = adapter_fasta
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+
+ assertAll(
+ { assert process.success },
+ {
+ assert snapshot(
+ (
+ [process.out.reads[0][0].toString()] + // meta
+ process.out.reads.collect { file(it[1]).getName() } +
+ process.out.json.collect { file(it[1]).getName() } +
+ process.out.html.collect { file(it[1]).getName() } +
+ process.out.log.collect { file(it[1]).getName() } +
+ process.out.reads_fail.collect { file(it[1]).getName() } +
+ process.out.reads_merged.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("test_fastp_single_end-for_stub_match")
+ },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+
+ test("test_fastp_paired_end") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ adapter_fasta = []
+ save_trimmed_fail = false
+ save_merged = false
+
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
+ ]
+
+ input[1] = adapter_fasta
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ def html_text = [ "Q20 bases: | 25.719000 K (93.033098%)",
+ "The input has little adapter percentage (~0.000000%), probably it's trimmed before."]
+ def log_text = [ "No adapter detected for read1",
+ "Q30 bases: 12281(88.3716%)"]
+ def json_text = ['"passed_filter_reads": 198']
+ def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1",
+ "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
+ "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE
+ { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) }
+ }
+ },
+ { read2_lines.each { read2_line ->
+ { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) }
+ }
+ },
+ { html_text.each { html_part ->
+ { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
+ }
+ },
+ { json_text.each { json_part ->
+ { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) }
+ }
+ },
+ { log_text.each { log_part ->
+ { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
+ }
+ },
+ {
+ assert snapshot(
+ (
+ [process.out.reads[0][0].toString()] + // meta
+ process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
+ process.out.json.collect { file(it[1]).getName() } +
+ process.out.html.collect { file(it[1]).getName() } +
+ process.out.log.collect { file(it[1]).getName() } +
+ process.out.reads_fail.collect { file(it[1]).getName() } +
+ process.out.reads_merged.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("test_fastp_paired_end-for_stub_match")
+ },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+
+ test("test_fastp_paired_end-stub") {
+
+ options '-stub'
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ adapter_fasta = []
+ save_trimmed_fail = false
+ save_merged = false
+
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
+ ]
+
+ input[1] = adapter_fasta
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ {
+ assert snapshot(
+ (
+ [process.out.reads[0][0].toString()] + // meta
+ process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
+ process.out.json.collect { file(it[1]).getName() } +
+ process.out.html.collect { file(it[1]).getName() } +
+ process.out.log.collect { file(it[1]).getName() } +
+ process.out.reads_fail.collect { file(it[1]).getName() } +
+ process.out.reads_merged.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("test_fastp_paired_end-for_stub_match")
+ },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+
+ test("fastp test_fastp_interleaved") {
+ config './nextflow.config'
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ adapter_fasta = []
+ save_trimmed_fail = false
+ save_merged = false
+
+ input[0] = [ [ id:'test', single_end:true ], // meta map
+ [ file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) ]
+ ]
+
+ input[1] = adapter_fasta
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ def html_text = [ "Q20 bases: | 25.719000 K (93.033098%)",
+ "paired end (151 cycles + 151 cycles)"]
+ def log_text = [ "Q20 bases: 12922(92.9841%)",
+ "reads passed filter: 198"]
+ def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1",
+ "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
+ "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE
+ { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) }
+ }
+ },
+ { html_text.each { html_part ->
+ { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
+ }
+ },
+ { assert snapshot(process.out.json).match("fastp test_fastp_interleaved_json") },
+ { log_text.each { log_part ->
+ { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
+ }
+ },
+ {
+ assert snapshot(
+ (
+ [process.out.reads[0][0].toString()] + // meta
+ process.out.reads.collect { file(it[1]).getName() } +
+ process.out.json.collect { file(it[1]).getName() } +
+ process.out.html.collect { file(it[1]).getName() } +
+ process.out.log.collect { file(it[1]).getName() } +
+ process.out.reads_fail.collect { file(it[1]).getName() } +
+ process.out.reads_merged.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("test_fastp_interleaved-for_stub_match")
+ },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+
+ test("fastp test_fastp_interleaved-stub") {
+
+ options '-stub'
+
+ config './nextflow.config'
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ adapter_fasta = []
+ save_trimmed_fail = false
+ save_merged = false
+
+ input[0] = [ [ id:'test', single_end:true ], // meta map
+ [ file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) ]
+ ]
+
+ input[1] = adapter_fasta
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ {
+ assert snapshot(
+ (
+ [process.out.reads[0][0].toString()] + // meta
+ process.out.reads.collect { file(it[1]).getName() } +
+ process.out.json.collect { file(it[1]).getName() } +
+ process.out.html.collect { file(it[1]).getName() } +
+ process.out.log.collect { file(it[1]).getName() } +
+ process.out.reads_fail.collect { file(it[1]).getName() } +
+ process.out.reads_merged.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("test_fastp_interleaved-for_stub_match")
+ },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+
+ test("test_fastp_single_end_trim_fail") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ adapter_fasta = []
+ save_trimmed_fail = true
+ save_merged = false
+
+ input[0] = [ [ id:'test', single_end:true ], // meta map
+ [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
+ ]
+ input[1] = adapter_fasta
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ def html_text = [ "Q20 bases: | 12.922000 K (92.984097%)",
+ "single end (151 cycles)"]
+ def log_text = [ "Q20 bases: 12922(92.9841%)",
+ "reads passed filter: 99" ]
+ def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1",
+ "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
+ "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE
+ { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) }
+ }
+ },
+ { failed_read_lines.each { failed_read_line ->
+ { assert path(process.out.reads_fail.get(0).get(1)).linesGzip.contains(failed_read_line) }
+ }
+ },
+ { html_text.each { html_part ->
+ { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
+ }
+ },
+ { assert snapshot(process.out.json).match("test_fastp_single_end_trim_fail_json") },
+ { log_text.each { log_part ->
+ { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
+ }
+ },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+
+ test("test_fastp_paired_end_trim_fail") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ adapter_fasta = []
+ save_trimmed_fail = true
+ save_merged = false
+
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
+ ]
+ ]
+ input[1] = adapter_fasta
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ def html_text = [ "Q20 bases: | 25.719000 K (93.033098%)",
+ "The input has little adapter percentage (~0.000000%), probably it's trimmed before."]
+ def log_text = [ "No adapter detected for read1",
+ "Q30 bases: 12281(88.3716%)"]
+ def json_text = ['"passed_filter_reads": 198']
+ def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1",
+ "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
+ "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE
+ { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) }
+ }
+ },
+ { read2_lines.each { read2_line ->
+ { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) }
+ }
+ },
+ { failed_read2_lines.each { failed_read2_line ->
+ { assert path(process.out.reads_fail.get(0).get(1).get(1)).linesGzip.contains(failed_read2_line) }
+ }
+ },
+ { html_text.each { html_part ->
+ { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
+ }
+ },
+ { json_text.each { json_part ->
+ { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) }
+ }
+ },
+ { log_text.each { log_part ->
+ { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
+ }
+ },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+
+ test("test_fastp_paired_end_merged") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ adapter_fasta = []
+ save_trimmed_fail = false
+ save_merged = true
+
+ input[0] = [ [ id:'test', single_end:false ], // meta map
+ [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
+ ]
+ input[1] = adapter_fasta
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ def html_text = [ ""]
+ def log_text = [ "Merged and filtered:",
+ "total reads: 75",
+ "total bases: 13683"]
+ def json_text = ['"merged_and_filtered": {', '"total_reads": 75', '"total_bases": 13683']
+ def read1_lines = [ "@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/1",
+ "CCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTC",
+ "AAAAAEAEEAEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEAEEEEEEEEEEEEEEEEE/EAEEEEEE/6EEEEEEEEEEAEEAEEE/EE/AEEAEEEEEAEEEA/EEAAEAE
+ { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) }
+ }
+ },
+ { read2_lines.each { read2_line ->
+ { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) }
+ }
+ },
+ { read_merged_lines.each { read_merged_line ->
+ { assert path(process.out.reads_merged.get(0).get(1)).linesGzip.contains(read_merged_line) }
+ }
+ },
+ { html_text.each { html_part ->
+ { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
+ }
+ },
+ { json_text.each { json_part ->
+ { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) }
+ }
+ },
+ { log_text.each { log_part ->
+ { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
+ }
+ },
+ {
+ assert snapshot(
+ (
+ [process.out.reads[0][0].toString()] + // meta
+ process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
+ process.out.json.collect { file(it[1]).getName() } +
+ process.out.html.collect { file(it[1]).getName() } +
+ process.out.log.collect { file(it[1]).getName() } +
+ process.out.reads_fail.collect { file(it[1]).getName() } +
+ process.out.reads_merged.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("test_fastp_paired_end_merged-for_stub_match")
+ },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+
+ test("test_fastp_paired_end_merged-stub") {
+
+ options '-stub'
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ adapter_fasta = []
+ save_trimmed_fail = false
+ save_merged = true
+
+ input[0] = [ [ id:'test', single_end:false ], // meta map
+ [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
+ ]
+ input[1] = adapter_fasta
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ {
+ assert snapshot(
+ (
+ [process.out.reads[0][0].toString()] + // meta
+ process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
+ process.out.json.collect { file(it[1]).getName() } +
+ process.out.html.collect { file(it[1]).getName() } +
+ process.out.log.collect { file(it[1]).getName() } +
+ process.out.reads_fail.collect { file(it[1]).getName() } +
+ process.out.reads_merged.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("test_fastp_paired_end_merged-for_stub_match")
+ },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+
+ test("test_fastp_paired_end_merged_adapterlist") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ adapter_fasta = file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/fastp/adapters.fasta", checkIfExists: true)
+ save_trimmed_fail = false
+ save_merged = true
+
+ input[0] = [ [ id:'test', single_end:false ], // meta map
+ [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
+ ]
+ input[1] = adapter_fasta
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ def html_text = [ ""]
+ def log_text = [ "Merged and filtered:",
+ "total reads: 75",
+ "total bases: 13683"]
+ def json_text = ['"merged_and_filtered": {', '"total_reads": 75', '"total_bases": 13683',"--adapter_fasta"]
+ def read1_lines = ["@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/1",
+ "CCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTC",
+ "AAAAAEAEEAEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEAEEEEEEEEEEEEEEEEE/EAEEEEEE/6EEEEEEEEEEAEEAEEE/EE/AEEAEEEEEAEEEA/EEAAEAE
+ { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) }
+ }
+ },
+ { read2_lines.each { read2_line ->
+ { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) }
+ }
+ },
+ { read_merged_lines.each { read_merged_line ->
+ { assert path(process.out.reads_merged.get(0).get(1)).linesGzip.contains(read_merged_line) }
+ }
+ },
+ { html_text.each { html_part ->
+ { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
+ }
+ },
+ { json_text.each { json_part ->
+ { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) }
+ }
+ },
+ { log_text.each { log_part ->
+ { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
+ }
+ },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+}
diff --git a/modules/nf-core/fastp/tests/main.nf.test.snap b/modules/nf-core/fastp/tests/main.nf.test.snap
new file mode 100644
index 0000000..1b7d241
--- /dev/null
+++ b/modules/nf-core/fastp/tests/main.nf.test.snap
@@ -0,0 +1,107 @@
+{
+ "test_fastp_paired_end-for_stub_match": {
+ "content": [
+ [
+ [
+ "test_1.fastp.fastq.gz",
+ "test_2.fastp.fastq.gz"
+ ],
+ "test.fastp.html",
+ "test.fastp.json",
+ "test.fastp.log",
+ "{id=test, single_end=false}"
+ ]
+ ],
+ "timestamp": "2023-12-21T09:44:37.202512"
+ },
+ "fastp test_fastp_interleaved_json": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.json:md5,168f516f7bd4b7b6c32da7cba87299a4"
+ ]
+ ]
+ ],
+ "timestamp": "2023-10-17T11:04:45.794175881"
+ },
+ "test_fastp_paired_end_merged-for_stub_match": {
+ "content": [
+ [
+ [
+ "test_1.fastp.fastq.gz",
+ "test_2.fastp.fastq.gz"
+ ],
+ "test.fastp.html",
+ "test.fastp.json",
+ "test.fastp.log",
+ "test.merged.fastq.gz",
+ "{id=test, single_end=false}"
+ ]
+ ],
+ "timestamp": "2023-12-21T09:53:45.237014"
+ },
+ "test_fastp_single_end_json": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.json:md5,c852d7a6dba5819e4ac8d9673bedcacc"
+ ]
+ ]
+ ],
+ "timestamp": "2023-10-17T11:04:10.566343705"
+ },
+ "versions": {
+ "content": [
+ [
+ "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
+ ]
+ ],
+ "timestamp": "2023-10-17T11:04:10.582076024"
+ },
+ "test_fastp_interleaved-for_stub_match": {
+ "content": [
+ [
+ "test.fastp.fastq.gz",
+ "test.fastp.html",
+ "test.fastp.json",
+ "test.fastp.log",
+ "{id=test, single_end=true}"
+ ]
+ ],
+ "timestamp": "2023-12-21T09:48:43.148485"
+ },
+ "test_fastp_single_end-for_stub_match": {
+ "content": [
+ [
+ "test.fastp.fastq.gz",
+ "test.fastp.html",
+ "test.fastp.json",
+ "test.fastp.log",
+ "{id=test, single_end=true}"
+ ]
+ ],
+ "timestamp": "2023-12-21T09:20:07.254788"
+ },
+ "test_fastp_single_end_trim_fail_json": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.json:md5,9a7ee180f000e8d00c7fb67f06293eb5"
+ ]
+ ]
+ ],
+ "timestamp": "2023-10-17T11:05:00.379878948"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/fastp/tests/nextflow.config b/modules/nf-core/fastp/tests/nextflow.config
new file mode 100644
index 0000000..0f7849a
--- /dev/null
+++ b/modules/nf-core/fastp/tests/nextflow.config
@@ -0,0 +1,6 @@
+process {
+
+ withName: FASTP {
+ ext.args = "--interleaved_in"
+ }
+}
diff --git a/modules/nf-core/fastp/tests/tags.yml b/modules/nf-core/fastp/tests/tags.yml
new file mode 100644
index 0000000..c1afcce
--- /dev/null
+++ b/modules/nf-core/fastp/tests/tags.yml
@@ -0,0 +1,2 @@
+fastp:
+ - modules/nf-core/fastp/**
diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml
new file mode 100644
index 0000000..1787b38
--- /dev/null
+++ b/modules/nf-core/fastqc/environment.yml
@@ -0,0 +1,7 @@
+name: fastqc
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::fastqc=0.12.1
diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf
index 249f906..9e19a74 100644
--- a/modules/nf-core/fastqc/main.nf
+++ b/modules/nf-core/fastqc/main.nf
@@ -2,10 +2,10 @@ process FASTQC {
tag "$meta.id"
label 'process_medium'
- conda "bioconda::fastqc=0.11.9"
+ conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' :
- 'biocontainers/fastqc:0.11.9--0' }"
+ 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' :
+ 'biocontainers/fastqc:0.12.1--hdfd78af_0' }"
input:
tuple val(meta), path(reads)
@@ -37,7 +37,7 @@ process FASTQC {
cat <<-END_VERSIONS > versions.yml
"${task.process}":
- fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" )
+ fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' )
END_VERSIONS
"""
@@ -49,7 +49,7 @@ process FASTQC {
cat <<-END_VERSIONS > versions.yml
"${task.process}":
- fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" )
+ fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' )
END_VERSIONS
"""
}
diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml
index 4da5bb5..ee5507e 100644
--- a/modules/nf-core/fastqc/meta.yml
+++ b/modules/nf-core/fastqc/meta.yml
@@ -50,3 +50,8 @@ authors:
- "@grst"
- "@ewels"
- "@FelixKrueger"
+maintainers:
+ - "@drpatelh"
+ - "@grst"
+ - "@ewels"
+ - "@FelixKrueger"
diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test
index 3961de6..ad9bc54 100644
--- a/modules/nf-core/fastqc/tests/main.nf.test
+++ b/modules/nf-core/fastqc/tests/main.nf.test
@@ -1,32 +1,220 @@
nextflow_process {
name "Test Process FASTQC"
- script "modules/nf-core/fastqc/main.nf"
+ script "../main.nf"
process "FASTQC"
+
+ tag "modules"
+ tag "modules_nfcore"
tag "fastqc"
- test("Single-Read") {
+ test("sarscov2 single-end [fastq]") {
when {
process {
"""
input[0] = [
- [ id: 'test', single_end:true ],
- [
- file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
- ]
+ [ id: 'test', single_end:true ],
+ [
+ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
+ ]
]
"""
}
}
then {
- assert process.success
- assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html"
- assert path(process.out.html.get(0).get(1)).getText().contains("File type | Conventional base calls | ")
- assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip"
+ assertAll (
+ { assert process.success },
+
+ // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it.
+ // looks like this:
+ // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039
+
+ { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" },
+ { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" },
+ { assert path(process.out.html[0][1]).text.contains("File type | Conventional base calls | ") },
+
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+
+ test("sarscov2 paired-end [fastq]") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [id: 'test', single_end: false], // meta map
+ [
+ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
+ ]
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+
+ { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" },
+ { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" },
+ { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" },
+ { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" },
+ { assert path(process.out.html[0][1][0]).text.contains("File type | Conventional base calls | ") },
+ { assert path(process.out.html[0][1][1]).text.contains("File type | Conventional base calls | ") },
+
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+
+ test("sarscov2 interleaved [fastq]") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [id: 'test', single_end: false], // meta map
+ file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+
+ { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" },
+ { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" },
+ { assert path(process.out.html[0][1]).text.contains("File type | Conventional base calls | ") },
+
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+
+ test("sarscov2 paired-end [bam]") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [id: 'test', single_end: false], // meta map
+ file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+
+ { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" },
+ { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" },
+ { assert path(process.out.html[0][1]).text.contains("File type | Conventional base calls | ") },
+
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+
+ test("sarscov2 multiple [fastq]") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [id: 'test', single_end: false], // meta map
+ [
+ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true),
+ file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true),
+ file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true)
+ ]
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+
+ { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" },
+ { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" },
+ { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" },
+ { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" },
+ { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" },
+ { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" },
+ { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" },
+ { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" },
+ { assert path(process.out.html[0][1][0]).text.contains("File type | Conventional base calls | ") },
+ { assert path(process.out.html[0][1][1]).text.contains("File type | Conventional base calls | ") },
+ { assert path(process.out.html[0][1][2]).text.contains("File type | Conventional base calls | ") },
+ { assert path(process.out.html[0][1][3]).text.contains("File type | Conventional base calls | ") },
+
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+
+ test("sarscov2 custom_prefix") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'mysample', single_end:true ], // meta map
+ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+
+ { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" },
+ { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" },
+ { assert path(process.out.html[0][1]).text.contains("File type | Conventional base calls | ") },
+
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+ }
+
+ test("sarscov2 single-end [fastq] - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id: 'test', single_end:true ],
+ [
+ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
+ ]
+ ]
+ """
+ }
}
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out.html.collect { file(it[1]).getName() } +
+ process.out.zip.collect { file(it[1]).getName() } +
+ process.out.versions ).match() }
+ )
+ }
}
}
diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap
new file mode 100644
index 0000000..5ef5afb
--- /dev/null
+++ b/modules/nf-core/fastqc/tests/main.nf.test.snap
@@ -0,0 +1,20 @@
+{
+ "sarscov2 single-end [fastq] - stub": {
+ "content": [
+ [
+ "test.html",
+ "test.zip",
+ "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+ ]
+ ],
+ "timestamp": "2023-12-29T02:48:05.126117287"
+ },
+ "versions": {
+ "content": [
+ [
+ "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+ ]
+ ],
+ "timestamp": "2023-12-29T02:46:49.507942667"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/fastqc/tests/tags.yml b/modules/nf-core/fastqc/tests/tags.yml
new file mode 100644
index 0000000..7834294
--- /dev/null
+++ b/modules/nf-core/fastqc/tests/tags.yml
@@ -0,0 +1,2 @@
+fastqc:
+ - modules/nf-core/fastqc/**
diff --git a/modules/nf-core/gffread/environment.yml b/modules/nf-core/gffread/environment.yml
new file mode 100644
index 0000000..5398f71
--- /dev/null
+++ b/modules/nf-core/gffread/environment.yml
@@ -0,0 +1,7 @@
+name: gffread
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::gffread=0.12.1
diff --git a/modules/nf-core/gffread/main.nf b/modules/nf-core/gffread/main.nf
new file mode 100644
index 0000000..d8a473e
--- /dev/null
+++ b/modules/nf-core/gffread/main.nf
@@ -0,0 +1,35 @@
+process GFFREAD {
+ tag "$gff"
+ label 'process_low'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/gffread:0.12.1--h8b12597_0' :
+ 'biocontainers/gffread:0.12.1--h8b12597_0' }"
+
+ input:
+ path gff
+
+ output:
+ path "*.gtf" , emit: gtf , optional: true
+ path "*.gff3" , emit: gffread_gff , optional: true
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${gff.baseName}"
+ def extension = args.contains("-T") ? 'gtf' : 'gffread.gff3'
+ """
+ gffread \\
+ $gff \\
+ $args \\
+ -o ${prefix}.${extension}
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ gffread: \$(gffread --version 2>&1)
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/gffread/meta.yml b/modules/nf-core/gffread/meta.yml
new file mode 100644
index 0000000..27ac310
--- /dev/null
+++ b/modules/nf-core/gffread/meta.yml
@@ -0,0 +1,36 @@
+name: gffread
+description: Validate, filter, convert and perform various other operations on GFF files
+keywords:
+ - gff
+ - conversion
+ - validation
+tools:
+ - gffread:
+ description: GFF/GTF utility providing format conversions, region filtering, FASTA sequence extraction and more.
+ homepage: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread
+ documentation: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread
+ tool_dev_url: https://github.com/gpertea/gffread
+ doi: 10.12688/f1000research.23297.1
+ licence: ["MIT"]
+input:
+ - gff:
+ type: file
+ description: A reference file in either the GFF3, GFF2 or GTF format.
+ pattern: "*.{gff, gtf}"
+output:
+ - gtf:
+ type: file
+ description: GTF file resulting from the conversion of the GFF input file if '-T' argument is present
+ pattern: "*.{gtf}"
+ - gffread_gff:
+ type: file
+ description: GFF3 file resulting from the conversion of the GFF input file if '-T' argument is absent
+ pattern: "*.{gff3}"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@emiller88"
+maintainers:
+ - "@emiller88"
diff --git a/modules/nf-core/gffread/tests/main.nf.test b/modules/nf-core/gffread/tests/main.nf.test
new file mode 100644
index 0000000..3c064b3
--- /dev/null
+++ b/modules/nf-core/gffread/tests/main.nf.test
@@ -0,0 +1,61 @@
+nextflow_process {
+
+ name "Test Process GFFREAD"
+ script "../main.nf"
+ process "GFFREAD"
+
+ tag "gffread"
+ tag "modules_nfcore"
+ tag "modules"
+
+ test("sarscov2-gff3-gtf") {
+
+ config "./nextflow.config"
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true)
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert process.out.gtf != null },
+ { assert process.out.gffread_gff == [] }
+ )
+ }
+
+ }
+
+ test("sarscov2-gff3-gff3") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true)
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert process.out.gtf == [] },
+ { assert process.out.gffread_gff != null },
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/gffread/tests/main.nf.test.snap b/modules/nf-core/gffread/tests/main.nf.test.snap
new file mode 100644
index 0000000..1f1342e
--- /dev/null
+++ b/modules/nf-core/gffread/tests/main.nf.test.snap
@@ -0,0 +1,52 @@
+{
+ "sarscov2-gff3-gtf": {
+ "content": [
+ {
+ "0": [
+ "genome.gtf:md5,2394072d7d31530dfd590c4a117bf6e3"
+ ],
+ "1": [
+
+ ],
+ "2": [
+ "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6"
+ ],
+ "gffread_gff": [
+
+ ],
+ "gtf": [
+ "genome.gtf:md5,2394072d7d31530dfd590c4a117bf6e3"
+ ],
+ "versions": [
+ "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6"
+ ]
+ }
+ ],
+ "timestamp": "2023-11-29T15:39:30.006985"
+ },
+ "sarscov2-gff3-gff3": {
+ "content": [
+ {
+ "0": [
+
+ ],
+ "1": [
+ "genome.gffread.gff3:md5,a7d40d99dcddac23ac673c473279ea2d"
+ ],
+ "2": [
+ "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6"
+ ],
+ "gffread_gff": [
+ "genome.gffread.gff3:md5,a7d40d99dcddac23ac673c473279ea2d"
+ ],
+ "gtf": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6"
+ ]
+ }
+ ],
+ "timestamp": "2023-11-29T15:39:34.636061"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/gffread/tests/nextflow.config b/modules/nf-core/gffread/tests/nextflow.config
new file mode 100644
index 0000000..74b2509
--- /dev/null
+++ b/modules/nf-core/gffread/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+ withName: GFFREAD {
+ ext.args = '-T'
+ }
+}
diff --git a/modules/nf-core/gffread/tests/tags.yml b/modules/nf-core/gffread/tests/tags.yml
new file mode 100644
index 0000000..0557606
--- /dev/null
+++ b/modules/nf-core/gffread/tests/tags.yml
@@ -0,0 +1,2 @@
+gffread:
+ - modules/nf-core/gffread/**
diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/gunzip/environment.yml
new file mode 100644
index 0000000..25910b3
--- /dev/null
+++ b/modules/nf-core/gunzip/environment.yml
@@ -0,0 +1,7 @@
+name: gunzip
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - conda-forge::sed=4.7
diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf
index 73bf08c..468a6f2 100644
--- a/modules/nf-core/gunzip/main.nf
+++ b/modules/nf-core/gunzip/main.nf
@@ -2,7 +2,7 @@ process GUNZIP {
tag "$archive"
label 'process_single'
- conda "conda-forge::sed=4.7"
+ conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
'nf-core/ubuntu:20.04' }"
diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml
index 4cdcdf4..231034f 100644
--- a/modules/nf-core/gunzip/meta.yml
+++ b/modules/nf-core/gunzip/meta.yml
@@ -33,3 +33,7 @@ authors:
- "@joseespinosa"
- "@drpatelh"
- "@jfy133"
+maintainers:
+ - "@joseespinosa"
+ - "@drpatelh"
+ - "@jfy133"
diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test
new file mode 100644
index 0000000..d031792
--- /dev/null
+++ b/modules/nf-core/gunzip/tests/main.nf.test
@@ -0,0 +1,35 @@
+nextflow_process {
+
+ name "Test Process GUNZIP"
+ script "../main.nf"
+ process "GUNZIP"
+ tag "gunzip"
+ tag "modules_nfcore"
+ tag "modules"
+
+ test("Should run without failures") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = [
+ [],
+ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/gunzip/tests/main.nf.test.snap b/modules/nf-core/gunzip/tests/main.nf.test.snap
new file mode 100644
index 0000000..720fd9f
--- /dev/null
+++ b/modules/nf-core/gunzip/tests/main.nf.test.snap
@@ -0,0 +1,31 @@
+{
+ "Should run without failures": {
+ "content": [
+ {
+ "0": [
+ [
+ [
+
+ ],
+ "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,54376d32aca20e937a4ec26dac228e84"
+ ],
+ "gunzip": [
+ [
+ [
+
+ ],
+ "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,54376d32aca20e937a4ec26dac228e84"
+ ]
+ }
+ ],
+ "timestamp": "2023-10-17T15:35:37.690477896"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/gunzip/tests/tags.yml b/modules/nf-core/gunzip/tests/tags.yml
new file mode 100644
index 0000000..fd3f691
--- /dev/null
+++ b/modules/nf-core/gunzip/tests/tags.yml
@@ -0,0 +1,2 @@
+gunzip:
+ - modules/nf-core/gunzip/**
diff --git a/modules/nf-core/samtools/cat/environment.yml b/modules/nf-core/samtools/cat/environment.yml
new file mode 100644
index 0000000..0455a7d
--- /dev/null
+++ b/modules/nf-core/samtools/cat/environment.yml
@@ -0,0 +1,7 @@
+name: samtools_cat
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::samtools=1.18
diff --git a/modules/nf-core/samtools/cat/main.nf b/modules/nf-core/samtools/cat/main.nf
index 22a63e2..b3b2508 100644
--- a/modules/nf-core/samtools/cat/main.nf
+++ b/modules/nf-core/samtools/cat/main.nf
@@ -2,10 +2,10 @@ process SAMTOOLS_CAT {
tag "$meta.id"
label 'process_low'
- conda "bioconda::samtools=1.17"
+ conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' :
- 'biocontainers/samtools:1.17--h00cdaf9_0' }"
+ 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' :
+ 'biocontainers/samtools:1.18--h50ea8bc_1' }"
input:
tuple val(meta), path(input_files, stageAs: "?/*")
diff --git a/modules/nf-core/samtools/cat/meta.yml b/modules/nf-core/samtools/cat/meta.yml
index 42632e7..3541e0c 100644
--- a/modules/nf-core/samtools/cat/meta.yml
+++ b/modules/nf-core/samtools/cat/meta.yml
@@ -47,3 +47,5 @@ output:
pattern: "versions.yml"
authors:
- "@matthdsm"
+maintainers:
+ - "@matthdsm"
diff --git a/modules/nf-core/samtools/cat/tests/main.nf.test b/modules/nf-core/samtools/cat/tests/main.nf.test
new file mode 100644
index 0000000..49c633f
--- /dev/null
+++ b/modules/nf-core/samtools/cat/tests/main.nf.test
@@ -0,0 +1,72 @@
+nextflow_process {
+
+ name "Test Process SAMTOOLS_CAT"
+ script "../main.nf"
+ process "SAMTOOLS_CAT"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "samtools"
+ tag "samtools/cat"
+
+ test("sarscov2 - [bam1, bam2]") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true),
+ file(params.test_data['sarscov2']['illumina']['test_unaligned_bam'], checkIfExists: true)
+ ]
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ file(process.out.bam[0][1]).name,
+ process.out.cram,
+ process.out.versions
+ ).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - [bam1, bam2] - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true),
+ file(params.test_data['sarscov2']['illumina']['test_unaligned_bam'], checkIfExists: true)
+ ]
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ file(process.out.bam[0][1]).name,
+ process.out.cram,
+ process.out.versions
+ ).match() }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/samtools/cat/tests/main.nf.test.snap b/modules/nf-core/samtools/cat/tests/main.nf.test.snap
new file mode 100644
index 0000000..298e25d
--- /dev/null
+++ b/modules/nf-core/samtools/cat/tests/main.nf.test.snap
@@ -0,0 +1,26 @@
+{
+ "sarscov2 - [bam1, bam2]": {
+ "content": [
+ "test.bam",
+ [
+
+ ],
+ [
+ "versions.yml:md5,f10a4f6b2e0272bef2ceb4ca826a15a1"
+ ]
+ ],
+ "timestamp": "2023-12-04T14:00:18.264348819"
+ },
+ "sarscov2 - [bam1, bam2] - stub": {
+ "content": [
+ "test.bam",
+ [
+
+ ],
+ [
+ "versions.yml:md5,f10a4f6b2e0272bef2ceb4ca826a15a1"
+ ]
+ ],
+ "timestamp": "2023-12-04T14:03:17.714482742"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/samtools/cat/tests/tags.yml b/modules/nf-core/samtools/cat/tests/tags.yml
new file mode 100644
index 0000000..9760557
--- /dev/null
+++ b/modules/nf-core/samtools/cat/tests/tags.yml
@@ -0,0 +1,2 @@
+samtools/cat:
+ - "modules/nf-core/samtools/cat/**"
diff --git a/modules/nf-core/sortmerna/environment.yml b/modules/nf-core/sortmerna/environment.yml
new file mode 100644
index 0000000..f40f995
--- /dev/null
+++ b/modules/nf-core/sortmerna/environment.yml
@@ -0,0 +1,7 @@
+name: sortmerna
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::sortmerna=4.3.6
diff --git a/modules/nf-core/sortmerna/main.nf b/modules/nf-core/sortmerna/main.nf
index 5b4fbca..29c640c 100644
--- a/modules/nf-core/sortmerna/main.nf
+++ b/modules/nf-core/sortmerna/main.nf
@@ -1,11 +1,11 @@
process SORTMERNA {
tag "$meta.id"
- label "process_high"
+ label 'process_high'
- conda "bioconda::sortmerna=4.3.4"
+ conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/sortmerna:4.3.4--h9ee0642_0' :
- 'biocontainers/sortmerna:4.3.4--h9ee0642_0' }"
+ 'https://depot.galaxyproject.org/singularity/sortmerna:4.3.6--h9ee0642_0' :
+ 'biocontainers/sortmerna:4.3.6--h9ee0642_0' }"
input:
tuple val(meta), path(reads)
diff --git a/modules/nf-core/sortmerna/meta.yml b/modules/nf-core/sortmerna/meta.yml
index 66f00de..de0b18e 100644
--- a/modules/nf-core/sortmerna/meta.yml
+++ b/modules/nf-core/sortmerna/meta.yml
@@ -48,4 +48,6 @@ output:
authors:
- "@drpatelh"
- "@mashehu"
- - "@gallvp"
+maintainers:
+ - "@drpatelh"
+ - "@mashehu"
diff --git a/modules/nf-core/sortmerna/tests/main.nf.test b/modules/nf-core/sortmerna/tests/main.nf.test
new file mode 100644
index 0000000..8a01e2a
--- /dev/null
+++ b/modules/nf-core/sortmerna/tests/main.nf.test
@@ -0,0 +1,144 @@
+nextflow_process {
+
+ name "Test Process SORTMERNA"
+ script "../main.nf"
+ process "SORTMERNA"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "sortmerna"
+
+ test("sarscov2 single_end") {
+
+ when {
+ process {
+ """
+ input[0] = [ [ id:'test', single_end:true ], // meta map
+ [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
+ ]
+ input[1] = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert process.out.reads },
+ { assert file(process.out.log[0][1]).text.contains("Total reads passing E-value threshold = 100 (100.00)") },
+ {
+ assert snapshot(
+ (
+ [process.out.reads[0][0].toString()] + // meta
+ process.out.reads.collect { file(it[1]).getName() } +
+ process.out.log.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("sarscov2 single_end-for_stub_match")
+ },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+
+ test("sarscov2 single_end stub") {
+
+ options '-stub'
+
+ when {
+ process {
+ """
+ input[0] = [ [ id:'test', single_end:true ], // meta map
+ [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
+ ]
+ input[1] = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ {
+ assert snapshot(
+ (
+ [process.out.reads[0][0].toString()] + // meta
+ process.out.reads.collect { file(it[1]).getName() } +
+ process.out.log.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("sarscov2 single_end-for_stub_match")
+ },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+
+ test("sarscov2 paired_end") {
+
+ when {
+ process {
+ """
+ input[0] = [ [ id:'test', single_end:false ], // meta map
+ [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
+ ]
+ input[1] = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert process.out.reads },
+ { assert file(process.out.log[0][1]).text.contains("Total reads passing E-value threshold = 200 (100.00)") },
+ {
+ assert snapshot(
+ (
+ [process.out.reads[0][0].toString()] + // meta
+ process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
+ process.out.log.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("sarscov2 paired_end-for_stub_match")
+ },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+
+ test("sarscov2 paired_end stub") {
+
+ options '-stub'
+
+ when {
+ process {
+ """
+ input[0] = [ [ id:'test', single_end:false ], // meta map
+ [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
+ ]
+ input[1] = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ {
+ assert snapshot(
+ (
+ [process.out.reads[0][0].toString()] + // meta
+ process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
+ process.out.log.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("sarscov2 paired_end-for_stub_match")
+ },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/sortmerna/tests/main.nf.test.snap b/modules/nf-core/sortmerna/tests/main.nf.test.snap
new file mode 100644
index 0000000..e502000
--- /dev/null
+++ b/modules/nf-core/sortmerna/tests/main.nf.test.snap
@@ -0,0 +1,33 @@
+{
+ "sarscov2 single_end-for_stub_match": {
+ "content": [
+ [
+ "test.non_rRNA.fastq.gz",
+ "test.sortmerna.log",
+ "{id=test, single_end=true}"
+ ]
+ ],
+ "timestamp": "2023-12-21T11:56:00.15356"
+ },
+ "versions": {
+ "content": [
+ [
+ "versions.yml:md5,7df9d50209f351e1f75e05a1fad6ba4b"
+ ]
+ ],
+ "timestamp": "2023-12-21T11:56:00.200244"
+ },
+ "sarscov2 paired_end-for_stub_match": {
+ "content": [
+ [
+ [
+ "test_1.non_rRNA.fastq.gz",
+ "test_2.non_rRNA.fastq.gz"
+ ],
+ "test.sortmerna.log",
+ "{id=test, single_end=false}"
+ ]
+ ],
+ "timestamp": "2023-12-21T12:00:47.879193"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/sortmerna/tests/tags.yml b/modules/nf-core/sortmerna/tests/tags.yml
new file mode 100644
index 0000000..e088480
--- /dev/null
+++ b/modules/nf-core/sortmerna/tests/tags.yml
@@ -0,0 +1,2 @@
+sortmerna:
+ - modules/nf-core/sortmerna/**
diff --git a/modules/nf-core/star/align/environment.yml b/modules/nf-core/star/align/environment.yml
new file mode 100644
index 0000000..36fcd02
--- /dev/null
+++ b/modules/nf-core/star/align/environment.yml
@@ -0,0 +1,9 @@
+name: star_align
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::star=2.7.10a
+ - bioconda::samtools=1.18
+ - conda-forge::gawk=5.1.0
diff --git a/modules/nf-core/star/align/main.nf b/modules/nf-core/star/align/main.nf
index d0e2038..8e9c48b 100644
--- a/modules/nf-core/star/align/main.nf
+++ b/modules/nf-core/star/align/main.nf
@@ -2,10 +2,10 @@ process STAR_ALIGN {
tag "$meta.id"
label 'process_high'
- conda "bioconda::star=2.7.10a bioconda::samtools=1.16.1 conda-forge::gawk=5.1.0"
+ conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' :
- 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' }"
+ 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' :
+ 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' }"
input:
tuple val(meta), path(reads, stageAs: "input*/*")
diff --git a/modules/nf-core/star/align/meta.yml b/modules/nf-core/star/align/meta.yml
index 3d8fed0..e80dbb7 100644
--- a/modules/nf-core/star/align/meta.yml
+++ b/modules/nf-core/star/align/meta.yml
@@ -52,7 +52,6 @@ input:
- seq_center:
type: string
description: Sequencing center
-
output:
- bam:
type: file
@@ -106,8 +105,11 @@ output:
type: file
description: STAR output bedGraph format file(s) (optional)
pattern: "*.bg"
-
authors:
- "@kevinmenden"
- "@drpatelh"
- "@praveenraj2018"
+maintainers:
+ - "@kevinmenden"
+ - "@drpatelh"
+ - "@praveenraj2018"
diff --git a/modules/nf-core/star/align/tests/main.nf.test b/modules/nf-core/star/align/tests/main.nf.test
new file mode 100644
index 0000000..4c87847
--- /dev/null
+++ b/modules/nf-core/star/align/tests/main.nf.test
@@ -0,0 +1,339 @@
+nextflow_process {
+
+ name "Test Process STAR_ALIGN"
+ script "../main.nf"
+ process "STAR_ALIGN"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "star"
+ tag "star/align"
+
+ test("homo_sapiens - single_end") {
+ config "./nextflow.config"
+
+ setup {
+ run("STAR_GENOMEGENERATE") {
+ script "../../../star/genomegenerate/main.nf"
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test_fasta' ],
+ [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
+ ])
+ input[1] = Channel.of([
+ [ id:'test_gtf' ],
+ [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
+ ])
+ """
+ }
+ }
+ }
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:true ], // meta map
+ [ file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true) ]
+ ])
+ input[1] = STAR_GENOMEGENERATE.out.index
+ input[2] = Channel.of([
+ [ id:'test_gtf' ],
+ [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
+ ])
+ input[3] = false
+ input[4] = 'illumina'
+ input[5] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - single_end - log_final") },
+ { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - single_end - log_out") },
+ { assert snapshot(process.out.bam).match("homo_sapiens - single_end - bam") },
+ { assert snapshot(process.out.bam_sorted).match("homo_sapiens - single_end - bam_sorted") },
+ { assert snapshot(process.out.bam_transcript).match("homo_sapiens - single_end - bam_transcript") },
+ { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - single_end - bam_unsorted") },
+ { assert snapshot(process.out.bedgraph).match("homo_sapiens - single_end - bedgraph") },
+ { assert snapshot(process.out.fastq).match("homo_sapiens - single_end - fastq") },
+ { assert snapshot(process.out.junction).match("homo_sapiens - single_end - junction") },
+ { assert snapshot(process.out.log_progress).match("homo_sapiens - single_end - log_progress") },
+ { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - single_end - read_per_gene_tab") },
+ { assert snapshot(process.out.sam).match("homo_sapiens - single_end - sam") },
+ { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - single_end - spl_junc_tab") },
+ { assert snapshot(process.out.tab).match("homo_sapiens - single_end - tab") },
+ { assert snapshot(process.out.wig).match("homo_sapiens - single_end - wig") },
+ { assert snapshot(process.out.versions).match("homo_sapiens - single_end - versions") }
+ )
+ }
+ }
+
+ test("homo_sapiens - paired_end") {
+ config "./nextflow.config"
+
+ setup {
+ run("STAR_GENOMEGENERATE") {
+ script "../../../star/genomegenerate/main.nf"
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test_fasta' ],
+ [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
+ ])
+ input[1] = Channel.of([
+ [ id:'test_gtf' ],
+ [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
+ ])
+ """
+ }
+ }
+ }
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true),
+ file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_2_fastq_gz'], checkIfExists: true)
+ ]
+ ])
+ input[1] = STAR_GENOMEGENERATE.out.index
+ input[2] = Channel.of([
+ [ id:'test_gtf' ],
+ [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
+ ])
+ input[3] = false
+ input[4] = 'illumina'
+ input[5] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - paired_end - log_final") },
+ { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - paired_end - log_out") },
+ { assert snapshot(process.out.bam).match("homo_sapiens - paired_end - bam") },
+ { assert snapshot(process.out.bam_sorted).match("homo_sapiens - paired_end - bam_sorted") },
+ { assert snapshot(process.out.bam_transcript).match("homo_sapiens - paired_end - bam_transcript") },
+ { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - paired_end - bam_unsorted") },
+ { assert snapshot(process.out.bedgraph).match("homo_sapiens - paired_end - bedgraph") },
+ { assert snapshot(process.out.fastq).match("homo_sapiens - paired_end - fastq") },
+ { assert snapshot(process.out.junction).match("homo_sapiens - paired_end - junction") },
+ { assert snapshot(process.out.log_progress).match("homo_sapiens - paired_end - log_progress") },
+ { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - paired_end - read_per_gene_tab") },
+ { assert snapshot(process.out.sam).match("homo_sapiens - paired_end - sam") },
+ { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - paired_end - spl_junc_tab") },
+ { assert snapshot(process.out.tab).match("homo_sapiens - paired_end - tab") },
+ { assert snapshot(process.out.wig).match("homo_sapiens - paired_end - wig") },
+ { assert snapshot(process.out.versions).match("homo_sapiens - paired_end - versions") }
+ )
+ }
+ }
+
+ test("homo_sapiens - paired_end - arriba") {
+ config "./nextflow.arriba.config"
+
+ setup {
+ run("STAR_GENOMEGENERATE") {
+ script "../../../star/genomegenerate/main.nf"
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test_fasta' ],
+ [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
+ ])
+ input[1] = Channel.of([
+ [ id:'test_gtf' ],
+ [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
+ ])
+ """
+ }
+ }
+ }
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true),
+ file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_2_fastq_gz'], checkIfExists: true)
+ ]
+ ])
+ input[1] = STAR_GENOMEGENERATE.out.index
+ input[2] = Channel.of([
+ [ id:'test_gtf' ],
+ [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
+ ])
+ input[3] = false
+ input[4] = 'illumina'
+ input[5] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - paired_end - arriba - log_final") },
+ { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - paired_end - arriba - log_out") },
+ { assert snapshot(file(process.out.log_progress[0][1]).name).match("homo_sapiens - paired_end - arriba - log_progress") },
+ { assert snapshot(process.out.bam).match("homo_sapiens - paired_end - arriba - bam") },
+ { assert snapshot(process.out.bam_sorted).match("homo_sapiens - paired_end - arriba - bam_sorted") },
+ { assert snapshot(process.out.bam_transcript).match("homo_sapiens - paired_end - arriba - bam_transcript") },
+ { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - paired_end - arriba - bam_unsorted") },
+ { assert snapshot(process.out.bedgraph).match("homo_sapiens - paired_end - arriba - bedgraph") },
+ { assert snapshot(process.out.fastq).match("homo_sapiens - paired_end - arriba - fastq") },
+ { assert snapshot(process.out.junction).match("homo_sapiens - paired_end - arriba - junction") },
+ { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - paired_end - arriba - read_per_gene_tab") },
+ { assert snapshot(process.out.sam).match("homo_sapiens - paired_end - arriba - sam") },
+ { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - paired_end - arriba - spl_junc_tab") },
+ { assert snapshot(process.out.tab).match("homo_sapiens - paired_end - arriba - tab") },
+ { assert snapshot(process.out.wig).match("homo_sapiens - paired_end - arriba - wig") },
+ { assert snapshot(process.out.versions).match("homo_sapiens - paired_end - arriba - versions") }
+ )
+ }
+ }
+
+ test("homo_sapiens - paired_end - starfusion") {
+ config "./nextflow.starfusion.config"
+
+ setup {
+ run("STAR_GENOMEGENERATE") {
+ script "../../../star/genomegenerate/main.nf"
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test_fasta' ],
+ [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
+ ])
+ input[1] = Channel.of([
+ [ id:'test_gtf' ],
+ [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
+ ])
+ """
+ }
+ }
+ }
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true),
+ file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_2_fastq_gz'], checkIfExists: true)
+ ]
+ ])
+ input[1] = STAR_GENOMEGENERATE.out.index
+ input[2] = Channel.of([
+ [ id:'test_gtf' ],
+ [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
+ ])
+ input[3] = false
+ input[4] = 'illumina'
+ input[5] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - paired_end - starfusion - log_final") },
+ { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - paired_end - starfusion - log_out") },
+ { assert snapshot(file(process.out.log_progress[0][1]).name).match("homo_sapiens - paired_end - starfusion - log_progress") },
+ { assert snapshot(process.out.bam).match("homo_sapiens - paired_end - starfusion - bam") },
+ { assert snapshot(process.out.bam_sorted).match("homo_sapiens - paired_end - starfusion - bam_sorted") },
+ { assert snapshot(process.out.bam_transcript).match("homo_sapiens - paired_end - starfusion - bam_transcript") },
+ { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - paired_end - starfusion - bam_unsorted") },
+ { assert snapshot(process.out.bedgraph).match("homo_sapiens - paired_end - starfusion - bedgraph") },
+ { assert snapshot(process.out.fastq).match("homo_sapiens - paired_end - starfusion - fastq") },
+ { assert snapshot(process.out.junction).match("homo_sapiens - paired_end - starfusion - junction") },
+ { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - paired_end - starfusion - read_per_gene_tab") },
+ { assert snapshot(process.out.sam).match("homo_sapiens - paired_end - starfusion - sam") },
+ { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - paired_end - starfusion - spl_junc_tab") },
+ { assert snapshot(process.out.tab).match("homo_sapiens - paired_end - starfusion - tab") },
+ { assert snapshot(process.out.wig).match("homo_sapiens - paired_end - starfusion - wig") },
+ { assert snapshot(process.out.versions).match("homo_sapiens - paired_end - starfusion - versions") }
+ )
+ }
+ }
+
+ test("homo_sapiens - paired_end - multiple") {
+ config "./nextflow.config"
+
+ setup {
+ run("STAR_GENOMEGENERATE") {
+ script "../../../star/genomegenerate/main.nf"
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test_fasta' ],
+ [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
+ ])
+ input[1] = Channel.of([
+ [ id:'test_gtf' ],
+ [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
+ ])
+ """
+ }
+ }
+ }
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true),
+ file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_2_fastq_gz'], checkIfExists: true),
+ file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true),
+ file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_2_fastq_gz'], checkIfExists: true)
+ ]
+ ])
+ input[1] = STAR_GENOMEGENERATE.out.index
+ input[2] = Channel.of([
+ [ id:'test_gtf' ],
+ [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
+ ])
+ input[3] = false
+ input[4] = 'illumina'
+ input[5] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - paired_end - multiple - log_final") },
+ { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - paired_end - multiple - log_out") },
+ { assert snapshot(file(process.out.log_progress[0][1]).name).match("homo_sapiens - paired_end - multiple - log_progress") },
+ { assert snapshot(process.out.bam).match("homo_sapiens - paired_end - multiple - bam") },
+ { assert snapshot(process.out.bam_sorted).match("homo_sapiens - paired_end - multiple - bam_sorted") },
+ { assert snapshot(process.out.bam_transcript).match("homo_sapiens - paired_end - multiple - bam_transcript") },
+ { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - paired_end - multiple - bam_unsorted") },
+ { assert snapshot(process.out.bedgraph).match("homo_sapiens - paired_end - multiple - bedgraph") },
+ { assert snapshot(process.out.fastq).match("homo_sapiens - paired_end - multiple - fastq") },
+ { assert snapshot(process.out.junction).match("homo_sapiens - paired_end - multiple - junction") },
+ { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - paired_end - multiple - read_per_gene_tab") },
+ { assert snapshot(process.out.sam).match("homo_sapiens - paired_end - multiple - sam") },
+ { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - paired_end - multiple - spl_junc_tab") },
+ { assert snapshot(process.out.tab).match("homo_sapiens - paired_end - multiple - tab") },
+ { assert snapshot(process.out.wig).match("homo_sapiens - paired_end - multiple - wig") },
+ { assert snapshot(process.out.versions).match("homo_sapiens - paired_end - multiple - versions") }
+ )
+ }
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/star/align/tests/main.nf.test.snap b/modules/nf-core/star/align/tests/main.nf.test.snap
new file mode 100644
index 0000000..08edb91
--- /dev/null
+++ b/modules/nf-core/star/align/tests/main.nf.test.snap
@@ -0,0 +1,769 @@
+{
+ "homo_sapiens - paired_end - multiple - bam_sorted": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.Aligned.sortedByCoord.out.bam:md5,ab07c21d63ab0a6c07d171d213c81d5a"
+ ]
+ ]
+ ],
+ "timestamp": "2023-12-04T18:01:19.968225733"
+ },
+ "homo_sapiens - paired_end - multiple - wig": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:29:01.857804"
+ },
+ "homo_sapiens - paired_end - arriba - tab": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.SJ.out.tab:md5,5155c9fd1f787ad6d7d80987fb06219c"
+ ]
+ ]
+ ],
+ "timestamp": "2023-12-04T17:56:12.347549723"
+ },
+ "homo_sapiens - single_end - wig": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:22:55.24701"
+ },
+ "homo_sapiens - paired_end - sam": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:23:33.383818"
+ },
+ "homo_sapiens - paired_end - arriba - versions": {
+ "content": [
+ [
+ "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f"
+ ]
+ ],
+ "timestamp": "2023-12-04T17:56:12.431212643"
+ },
+ "homo_sapiens - paired_end - multiple - bedgraph": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test.Signal.Unique.str1.out.bg:md5,d7bf8b70b436ca048a62513e1d0ece3a",
+ "test.Signal.UniqueMultiple.str1.out.bg:md5,686d58493b9eb445b56ace4d67f76ef6"
+ ]
+ ]
+ ]
+ ],
+ "timestamp": "2023-12-04T18:01:20.07119229"
+ },
+ "homo_sapiens - paired_end - read_per_gene_tab": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:23:33.368841"
+ },
+ "homo_sapiens - paired_end - arriba - bedgraph": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:25:07.102537"
+ },
+ "homo_sapiens - single_end - junction": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:22:55.185369"
+ },
+ "homo_sapiens - paired_end - arriba - spl_junc_tab": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.SJ.out.tab:md5,5155c9fd1f787ad6d7d80987fb06219c"
+ ]
+ ]
+ ],
+ "timestamp": "2023-12-04T17:56:12.268388251"
+ },
+ "homo_sapiens - single_end - sam": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:22:55.216183"
+ },
+ "homo_sapiens - paired_end - fastq": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:23:33.327236"
+ },
+ "homo_sapiens - single_end - versions": {
+ "content": [
+ [
+ "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f"
+ ]
+ ],
+ "timestamp": "2023-12-04T17:53:26.664210196"
+ },
+ "homo_sapiens - paired_end - multiple - log_out": {
+ "content": [
+ "test.Log.out"
+ ],
+ "timestamp": "2023-11-23T13:29:01.022176"
+ },
+ "homo_sapiens - paired_end - arriba - fastq": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:25:07.15277"
+ },
+ "homo_sapiens - paired_end - multiple - junction": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:29:01.52923"
+ },
+ "homo_sapiens - paired_end - multiple - spl_junc_tab": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.SJ.out.tab:md5,069877e053714e23010fe4e1c003b4a2"
+ ]
+ ]
+ ],
+ "timestamp": "2023-12-04T18:01:20.189486201"
+ },
+ "homo_sapiens - paired_end - starfusion - log_final": {
+ "content": [
+ "test.Log.final.out"
+ ],
+ "timestamp": "2023-11-23T13:27:55.905883"
+ },
+ "homo_sapiens - paired_end - starfusion - fastq": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:27:56.192302"
+ },
+ "homo_sapiens - paired_end - multiple - sam": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:29:01.661837"
+ },
+ "homo_sapiens - paired_end - multiple - log_final": {
+ "content": [
+ "test.Log.final.out"
+ ],
+ "timestamp": "2023-11-23T13:29:00.966417"
+ },
+ "homo_sapiens - paired_end - starfusion - bam": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.Aligned.out.bam:md5,bcad07b838f6762fc01eea52b5cd3f84"
+ ]
+ ]
+ ],
+ "timestamp": "2023-12-04T17:59:58.53235164"
+ },
+ "homo_sapiens - paired_end - arriba - junction": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:25:07.202776"
+ },
+ "homo_sapiens - single_end - bedgraph": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ [
+ "test.Signal.Unique.str1.out.bg:md5,c56fc1472776fb927eaf62d973da5f9a",
+ "test.Signal.UniqueMultiple.str1.out.bg:md5,e93373cf6f2a2a9506e2efdb260cdd4f"
+ ]
+ ]
+ ]
+ ],
+ "timestamp": "2023-12-04T17:53:26.394863748"
+ },
+ "homo_sapiens - paired_end - arriba - read_per_gene_tab": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:25:07.251962"
+ },
+ "homo_sapiens - paired_end - starfusion - bam_sorted": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:27:56.040843"
+ },
+ "homo_sapiens - single_end - bam_unsorted": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:22:55.154172"
+ },
+ "homo_sapiens - paired_end - bam": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.Aligned.sortedByCoord.out.bam:md5,b9ee1c607e07323bc1652ef3babb543f"
+ ]
+ ]
+ ],
+ "timestamp": "2023-12-04T17:54:11.934832258"
+ },
+ "homo_sapiens - paired_end - arriba - bam_transcript": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:25:06.998817"
+ },
+ "homo_sapiens - paired_end - log_out": {
+ "content": [
+ "test.Log.out"
+ ],
+ "timestamp": "2023-11-23T13:23:33.259699"
+ },
+ "homo_sapiens - paired_end - arriba - log_out": {
+ "content": [
+ "test.Log.out"
+ ],
+ "timestamp": "2023-11-23T13:25:06.849451"
+ },
+ "homo_sapiens - paired_end - multiple - versions": {
+ "content": [
+ [
+ "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f"
+ ]
+ ],
+ "timestamp": "2023-12-04T18:01:20.393705142"
+ },
+ "homo_sapiens - paired_end - starfusion - bam_transcript": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:27:56.082408"
+ },
+ "homo_sapiens - paired_end - starfusion - tab": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.SJ.out.tab:md5,19c3faa1bfa9a0cc5e4c45f17065b53a"
+ ]
+ ]
+ ],
+ "timestamp": "2023-12-04T17:59:58.818041322"
+ },
+ "homo_sapiens - single_end - fastq": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:22:55.175307"
+ },
+ "homo_sapiens - paired_end - tab": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.SJ.out.tab:md5,844af19ab0fc8cd9a3f75228445aca0d"
+ ]
+ ]
+ ],
+ "timestamp": "2023-12-04T17:54:12.255481058"
+ },
+ "homo_sapiens - paired_end - starfusion - bedgraph": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:27:56.155413"
+ },
+ "homo_sapiens - single_end - bam_transcript": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:22:55.144852"
+ },
+ "homo_sapiens - paired_end - versions": {
+ "content": [
+ [
+ "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f"
+ ]
+ ],
+ "timestamp": "2023-12-04T17:54:12.343840482"
+ },
+ "homo_sapiens - paired_end - multiple - tab": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.SJ.out.tab:md5,069877e053714e23010fe4e1c003b4a2"
+ ]
+ ]
+ ],
+ "timestamp": "2023-12-04T18:01:20.291692062"
+ },
+ "homo_sapiens - single_end - bam": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.Aligned.sortedByCoord.out.bam:md5,c6cfaccaf91bc7fdabed3cfe236d4535"
+ ]
+ ]
+ ],
+ "timestamp": "2023-12-04T17:53:26.265642675"
+ },
+ "homo_sapiens - paired_end - arriba - wig": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:25:07.444214"
+ },
+ "homo_sapiens - paired_end - log_progress": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.Log.progress.out:md5,b2bd061d6cbaaf3d6d3b1fed547f69b8"
+ ]
+ ]
+ ],
+ "timestamp": "2023-12-04T17:54:12.126063825"
+ },
+ "homo_sapiens - paired_end - arriba - log_final": {
+ "content": [
+ "test.Log.final.out"
+ ],
+ "timestamp": "2023-11-23T13:25:06.829799"
+ },
+ "homo_sapiens - paired_end - bam_unsorted": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:23:33.300509"
+ },
+ "homo_sapiens - paired_end - arriba - sam": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:25:07.300383"
+ },
+ "homo_sapiens - paired_end - multiple - bam": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.Aligned.sortedByCoord.out.bam:md5,ab07c21d63ab0a6c07d171d213c81d5a"
+ ]
+ ]
+ ],
+ "timestamp": "2023-12-04T18:01:19.851247126"
+ },
+ "homo_sapiens - paired_end - multiple - fastq": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:29:01.462257"
+ },
+ "homo_sapiens - single_end - bam_sorted": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.Aligned.sortedByCoord.out.bam:md5,c6cfaccaf91bc7fdabed3cfe236d4535"
+ ]
+ ]
+ ],
+ "timestamp": "2023-12-04T17:53:26.335457371"
+ },
+ "homo_sapiens - paired_end - arriba - bam_sorted": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:25:06.94699"
+ },
+ "homo_sapiens - paired_end - starfusion - junction": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.Chimeric.out.junction:md5,c10ef219f4a30e83711b995bc5e40dba"
+ ]
+ ]
+ ],
+ "timestamp": "2023-12-04T17:59:58.641115828"
+ },
+ "homo_sapiens - single_end - tab": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.SJ.out.tab:md5,75a516ab950fb958f40b29996474949c"
+ ]
+ ]
+ ],
+ "timestamp": "2023-12-04T17:53:26.580593434"
+ },
+ "homo_sapiens - paired_end - starfusion - versions": {
+ "content": [
+ [
+ "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f"
+ ]
+ ],
+ "timestamp": "2023-12-04T17:59:58.907317103"
+ },
+ "homo_sapiens - paired_end - multiple - bam_unsorted": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:29:01.330463"
+ },
+ "homo_sapiens - paired_end - arriba - log_progress": {
+ "content": [
+ "test.Log.progress.out"
+ ],
+ "timestamp": "2023-11-23T13:25:06.86866"
+ },
+ "homo_sapiens - paired_end - bedgraph": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test.Signal.Unique.str1.out.bg:md5,d7bf8b70b436ca048a62513e1d0ece3a",
+ "test.Signal.UniqueMultiple.str1.out.bg:md5,686d58493b9eb445b56ace4d67f76ef6"
+ ]
+ ]
+ ]
+ ],
+ "timestamp": "2023-12-04T17:54:12.064121304"
+ },
+ "homo_sapiens - paired_end - starfusion - bam_unsorted": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:27:56.118974"
+ },
+ "homo_sapiens - paired_end - starfusion - read_per_gene_tab": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:27:56.264699"
+ },
+ "homo_sapiens - paired_end - multiple - log_progress": {
+ "content": [
+ "test.Log.progress.out"
+ ],
+ "timestamp": "2023-11-23T13:29:01.076947"
+ },
+ "homo_sapiens - paired_end - arriba - bam_unsorted": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:25:07.050409"
+ },
+ "homo_sapiens - paired_end - bam_sorted": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.Aligned.sortedByCoord.out.bam:md5,b9ee1c607e07323bc1652ef3babb543f"
+ ]
+ ]
+ ],
+ "timestamp": "2023-12-04T17:54:12.002180537"
+ },
+ "homo_sapiens - single_end - spl_junc_tab": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.SJ.out.tab:md5,75a516ab950fb958f40b29996474949c"
+ ]
+ ]
+ ],
+ "timestamp": "2023-12-04T17:53:26.50932751"
+ },
+ "homo_sapiens - paired_end - starfusion - spl_junc_tab": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.SJ.out.tab:md5,19c3faa1bfa9a0cc5e4c45f17065b53a"
+ ]
+ ]
+ ],
+ "timestamp": "2023-12-04T17:59:58.731699486"
+ },
+ "homo_sapiens - single_end - log_out": {
+ "content": [
+ "test.Log.out"
+ ],
+ "timestamp": "2023-11-23T13:22:55.126286"
+ },
+ "homo_sapiens - paired_end - log_final": {
+ "content": [
+ "test.Log.final.out"
+ ],
+ "timestamp": "2023-11-23T13:23:33.253884"
+ },
+ "homo_sapiens - single_end - log_final": {
+ "content": [
+ "test.Log.final.out"
+ ],
+ "timestamp": "2023-11-23T13:22:55.11799"
+ },
+ "homo_sapiens - paired_end - bam_transcript": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:23:33.287684"
+ },
+ "homo_sapiens - paired_end - starfusion - log_progress": {
+ "content": [
+ "test.Log.progress.out"
+ ],
+ "timestamp": "2023-11-23T13:27:55.971484"
+ },
+ "homo_sapiens - paired_end - multiple - bam_transcript": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:29:01.264176"
+ },
+ "homo_sapiens - paired_end - multiple - read_per_gene_tab": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:29:01.596406"
+ },
+ "homo_sapiens - single_end - read_per_gene_tab": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:22:55.205936"
+ },
+ "homo_sapiens - paired_end - junction": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:23:33.340653"
+ },
+ "homo_sapiens - paired_end - spl_junc_tab": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.SJ.out.tab:md5,844af19ab0fc8cd9a3f75228445aca0d"
+ ]
+ ]
+ ],
+ "timestamp": "2023-12-04T17:54:12.185730856"
+ },
+ "homo_sapiens - paired_end - starfusion - sam": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:27:56.300637"
+ },
+ "homo_sapiens - paired_end - arriba - bam": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.Aligned.out.bam:md5,c1b1747f5873f2d17762725636e891d5"
+ ]
+ ]
+ ],
+ "timestamp": "2023-12-04T17:56:12.190560178"
+ },
+ "homo_sapiens - single_end - log_progress": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.Log.progress.out:md5,b2bd061d6cbaaf3d6d3b1fed547f69b8"
+ ]
+ ]
+ ],
+ "timestamp": "2023-12-04T17:53:26.450352138"
+ },
+ "homo_sapiens - paired_end - starfusion - wig": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:27:56.422018"
+ },
+ "homo_sapiens - paired_end - wig": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-23T13:23:33.429457"
+ },
+ "homo_sapiens - paired_end - starfusion - log_out": {
+ "content": [
+ "test.Log.out"
+ ],
+ "timestamp": "2023-11-23T13:27:55.93945"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/star/align/tests/nextflow.arriba.config b/modules/nf-core/star/align/tests/nextflow.arriba.config
new file mode 100644
index 0000000..2324b9e
--- /dev/null
+++ b/modules/nf-core/star/align/tests/nextflow.arriba.config
@@ -0,0 +1,14 @@
+process {
+
+ withName: STAR_GENOMEGENERATE {
+ ext.args = '--genomeSAindexNbases 9'
+ }
+
+ withName: STAR_ALIGN {
+ ext.args = '--readFilesCommand zcat --outSAMtype BAM Unsorted --outSAMunmapped Within --outBAMcompression 0 --outFilterMultimapNmax 50 --peOverlapNbasesMin 10 --alignSplicedMateMapLminOverLmate 0.5 --alignSJstitchMismatchNmax 5 -1 5 5 --chimSegmentMin 10 --chimOutType WithinBAM HardClip --chimJunctionOverhangMin 10 --chimScoreDropMax 30 --chimScoreJunctionNonGTAG 0 --chimScoreSeparation 1 --chimSegmentReadGapMax 3 --chimMultimapNmax 50'
+ }
+
+}
+
+// Fix chown issue for the output star folder
+docker.runOptions = '--platform=linux/amd64 -u $(id -u):$(id -g)'
diff --git a/modules/nf-core/star/align/tests/nextflow.config b/modules/nf-core/star/align/tests/nextflow.config
new file mode 100644
index 0000000..c4ac580
--- /dev/null
+++ b/modules/nf-core/star/align/tests/nextflow.config
@@ -0,0 +1,14 @@
+process {
+
+ withName: STAR_GENOMEGENERATE {
+ ext.args = '--genomeSAindexNbases 9'
+ }
+
+ withName: STAR_ALIGN {
+ ext.args = '--readFilesCommand zcat --outSAMtype BAM SortedByCoordinate --outWigType bedGraph --outWigStrand Unstranded'
+ }
+
+}
+
+// Fix chown issue for the output star folder
+docker.runOptions = '--platform=linux/amd64 -u $(id -u):$(id -g)'
diff --git a/modules/nf-core/star/align/tests/nextflow.starfusion.config b/modules/nf-core/star/align/tests/nextflow.starfusion.config
new file mode 100644
index 0000000..467b649
--- /dev/null
+++ b/modules/nf-core/star/align/tests/nextflow.starfusion.config
@@ -0,0 +1,14 @@
+process {
+
+ withName: STAR_GENOMEGENERATE {
+ ext.args = '--genomeSAindexNbases 9'
+ }
+
+ withName: STAR_ALIGN {
+ ext.args = '--readFilesCommand zcat --outSAMtype BAM Unsorted --outReadsUnmapped None --twopassMode Basic --outSAMstrandField intronMotif --outSAMunmapped Within --chimSegmentMin 12 --chimJunctionOverhangMin 8 --chimOutJunctionFormat 1 --alignSJDBoverhangMin 10 --alignMatesGapMax 100000 --alignIntronMax 100000 --alignSJstitchMismatchNmax 5 -1 5 5 --chimMultimapScoreRange 3 --chimScoreJunctionNonGTAG -4 --chimMultimapNmax 20 --chimNonchimScoreDropMin 10 --peOverlapNbasesMin 12 --peOverlapMMp 0.1 --alignInsertionFlush Right --alignSplicedMateMapLminOverLmate 0 --alignSplicedMateMapLmin 30'
+ }
+
+}
+
+// Fix chown issue for the output star folder
+docker.runOptions = '--platform=linux/amd64 -u $(id -u):$(id -g)'
diff --git a/modules/nf-core/star/align/tests/tags.yml b/modules/nf-core/star/align/tests/tags.yml
new file mode 100644
index 0000000..8beace1
--- /dev/null
+++ b/modules/nf-core/star/align/tests/tags.yml
@@ -0,0 +1,2 @@
+star/align:
+ - modules/nf-core/star/align/**
diff --git a/modules/nf-core/star/genomegenerate/environment.yml b/modules/nf-core/star/genomegenerate/environment.yml
new file mode 100644
index 0000000..93e4476
--- /dev/null
+++ b/modules/nf-core/star/genomegenerate/environment.yml
@@ -0,0 +1,11 @@
+name: star_genomegenerate
+
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+
+dependencies:
+ - bioconda::samtools=1.18
+ - bioconda::star=2.7.10a
+ - conda-forge::gawk=5.1.0
diff --git a/modules/nf-core/star/genomegenerate/main.nf b/modules/nf-core/star/genomegenerate/main.nf
index ed32d7c..b885571 100644
--- a/modules/nf-core/star/genomegenerate/main.nf
+++ b/modules/nf-core/star/genomegenerate/main.nf
@@ -2,15 +2,14 @@ process STAR_GENOMEGENERATE {
tag "$fasta"
label 'process_high'
- conda "bioconda::star=2.7.10a bioconda::samtools=1.16.1 conda-forge::gawk=5.1.0"
+ conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' :
- 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' }"
+ 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' :
+ 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' }"
input:
tuple val(meta), path(fasta)
tuple val(meta2), path(gtf)
- val star_ignore_sjdbgtf
output:
tuple val(meta), path("star") , emit: index
@@ -20,10 +19,10 @@ process STAR_GENOMEGENERATE {
task.ext.when == null || task.ext.when
script:
- def args = task.ext.args ?: ''
- def args_list = args.tokenize()
- def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : ''
- def ignore_gtf = star_ignore_sjdbgtf ? '' : "--sjdbGTFfile $gtf"
+ def args = task.ext.args ?: ''
+ def args_list = args.tokenize()
+ def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : ''
+ def include_gtf = gtf ? "--sjdbGTFfile $gtf" : ''
if (args_list.contains('--genomeSAindexNbases')) {
"""
mkdir star
@@ -31,7 +30,7 @@ process STAR_GENOMEGENERATE {
--runMode genomeGenerate \\
--genomeDir star/ \\
--genomeFastaFiles $fasta \\
- $ignore_gtf \\
+ $include_gtf \\
--runThreadN $task.cpus \\
$memory \\
$args
@@ -53,7 +52,7 @@ process STAR_GENOMEGENERATE {
--runMode genomeGenerate \\
--genomeDir star/ \\
--genomeFastaFiles $fasta \\
- $ignore_gtf \\
+ $include_gtf \\
--runThreadN $task.cpus \\
--genomeSAindexNbases \$NUM_BASES \\
$memory \\
@@ -69,30 +68,52 @@ process STAR_GENOMEGENERATE {
}
stub:
- """
- mkdir star
- touch star/Genome
- touch star/Log.out
- touch star/SA
- touch star/SAindex
- touch star/chrLength.txt
- touch star/chrName.txt
- touch star/chrNameLength.txt
- touch star/chrStart.txt
- touch star/exonGeTrInfo.tab
- touch star/exonInfo.tab
- touch star/geneInfo.tab
- touch star/genomeParameters.txt
- touch star/sjdbInfo.txt
- touch star/sjdbList.fromGTF.out.tab
- touch star/sjdbList.out.tab
- touch star/transcriptInfo.tab
+ if (gtf) {
+ """
+ mkdir star
+ touch star/Genome
+ touch star/Log.out
+ touch star/SA
+ touch star/SAindex
+ touch star/chrLength.txt
+ touch star/chrName.txt
+ touch star/chrNameLength.txt
+ touch star/chrStart.txt
+ touch star/exonGeTrInfo.tab
+ touch star/exonInfo.tab
+ touch star/geneInfo.tab
+ touch star/genomeParameters.txt
+ touch star/sjdbInfo.txt
+ touch star/sjdbList.fromGTF.out.tab
+ touch star/sjdbList.out.tab
+ touch star/transcriptInfo.tab
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- star: \$(STAR --version | sed -e "s/STAR_//g")
- samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
- gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//')
- END_VERSIONS
- """
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ star: \$(STAR --version | sed -e "s/STAR_//g")
+ samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+ gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//')
+ END_VERSIONS
+ """
+ } else {
+ """
+ mkdir star
+ touch star/Genome
+ touch star/Log.out
+ touch star/SA
+ touch star/SAindex
+ touch star/chrLength.txt
+ touch star/chrName.txt
+ touch star/chrNameLength.txt
+ touch star/chrStart.txt
+ touch star/genomeParameters.txt
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ star: \$(STAR --version | sed -e "s/STAR_//g")
+ samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+ gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//')
+ END_VERSIONS
+ """
+ }
}
diff --git a/modules/nf-core/star/genomegenerate/meta.yml b/modules/nf-core/star/genomegenerate/meta.yml
index e824dbf..1061e1b 100644
--- a/modules/nf-core/star/genomegenerate/meta.yml
+++ b/modules/nf-core/star/genomegenerate/meta.yml
@@ -31,7 +31,6 @@ input:
- gtf:
type: file
description: GTF file of the reference genome
-
output:
- meta:
type: map
@@ -46,8 +45,9 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
-
authors:
- "@kevinmenden"
- "@drpatelh"
- - "@gallvp"
+maintainers:
+ - "@kevinmenden"
+ - "@drpatelh"
diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test b/modules/nf-core/star/genomegenerate/tests/main.nf.test
new file mode 100644
index 0000000..af0c942
--- /dev/null
+++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test
@@ -0,0 +1,117 @@
+nextflow_process {
+
+ name "Test Process STAR_GENOMEGENERATE"
+ script "../main.nf"
+ process "STAR_GENOMEGENERATE"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "star"
+ tag "star/genomegenerate"
+
+ test("homo_sapiens") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test_fasta' ],
+ [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
+ ])
+ input[1] = Channel.of([
+ [ id:'test_gtf' ],
+ [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_with_gtf") },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+
+ test("homo_sapiens-stub") {
+
+ options '-stub'
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test_fasta' ],
+ [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
+ ])
+ input[1] = Channel.of([
+ [ id:'test_gtf' ],
+ [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_with_gtf") },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+
+ test("homo_sapiens-without_gtf") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test_fasta' ],
+ [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
+ ])
+ input[1] = Channel.of([ [], [] ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_without_gtf") },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+
+ test("homo_sapiens-without_gtf-stub") {
+
+ options '-stub'
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test_fasta' ],
+ [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
+ ])
+ input[1] = Channel.of([ [], [] ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_without_gtf") },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+
+}
\ No newline at end of file
diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap
new file mode 100644
index 0000000..9de08c7
--- /dev/null
+++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap
@@ -0,0 +1,22 @@
+{
+ "versions": {
+ "content": [
+ [
+ "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f"
+ ]
+ ],
+ "timestamp": "2023-12-19T11:05:51.741109"
+ },
+ "index_with_gtf": {
+ "content": [
+ "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, exonGeTrInfo.tab, exonInfo.tab, geneInfo.tab, genomeParameters.txt, sjdbInfo.txt, sjdbList.fromGTF.out.tab, sjdbList.out.tab, transcriptInfo.tab]"
+ ],
+ "timestamp": "2023-12-19T11:38:14.551548"
+ },
+ "index_without_gtf": {
+ "content": [
+ "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, genomeParameters.txt]"
+ ],
+ "timestamp": "2023-12-19T11:38:22.382905"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/star/genomegenerate/tests/tags.yml b/modules/nf-core/star/genomegenerate/tests/tags.yml
new file mode 100644
index 0000000..79f619b
--- /dev/null
+++ b/modules/nf-core/star/genomegenerate/tests/tags.yml
@@ -0,0 +1,2 @@
+star/genomegenerate:
+ - modules/nf-core/star/genomegenerate/**
diff --git a/modules/nf-core/star/starsolo/main.nf b/modules/nf-core/star/starsolo/main.nf
deleted file mode 100644
index 07499b6..0000000
--- a/modules/nf-core/star/starsolo/main.nf
+++ /dev/null
@@ -1,94 +0,0 @@
-process STARSOLO {
- tag "$meta.id"
- label 'process_high'
-
- conda "bioconda::star=2.7.10b"
- container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/star:2.7.10b--h9ee0642_0':
- 'biocontainers/star:2.7.10b--h9ee0642_0' }"
-
- input:
- tuple val(meta), val(solotype), path(reads)
- tuple val(meta2), path(index)
-
- output:
- tuple val(meta), path('*.Solo.out') , emit: counts
- tuple val(meta), path('*Log.final.out') , emit: log_final
- tuple val(meta), path('*Log.out') , emit: log_out
- tuple val(meta), path('*Log.progress.out') , emit: log_progress
- tuple val(meta), path('*/Gene/Summary.csv') , emit: summary
- path "versions.yml" , emit: versions
- when:
- task.ext.when == null || task.ext.when
-
- script:
- def args = task.ext.args ?: ''
- def prefix = task.ext.prefix ?: "${meta.id}"
- def (forward, reverse) = reads.collate(2).transpose()
- def zcat = reads[0].getExtension() == "gz" ? "--readFilesCommand zcat": ""
-
- // Handle solotype argument logic
- switch(solotype) {
- case "CB_UMI_Simple":
- solotype_args = meta.umi_len ? "--soloUMIlen ${meta.umi_len} " : "";
- solotype_args = solotype_args + (meta.whitelist ? "--soloCBwhitelist ${meta.whitelist} " : "--soloCBwhitelist None ");
- solotype_args = solotype_args + (meta.umi_start ? "--soloUMIstart ${meta.umi_start} " : "");
- solotype_args = solotype_args + (meta.cb_len ? "--soloCBlen ${meta.cb_len} " : "");
- solotype_args = solotype_args + (meta.cb_start ? "--soloCBstart ${meta.cb_start} " : "");
- solotype_args = solotype_args + (meta.barcode_len ? "--soloBarcodeReadLength ${meta.barcode_len} " : "");
- solotype_args = solotype_args + (meta.barcode_mate ? "--soloBarcodeMate ${meta.barcode_mate} " : "");
- break
- case "CB_UMI_Complex":
- solotype_args = meta.cb_position ? "--soloCBposition ${meta.cb_position}" : "";
- solotype_args = solotype_args + (meta.whitelist ? "--soloCBwhitelist ${meta.whitelist} " : "--soloCBwhitelist None ");
- solotype_args = solotype_args + (meta.umi_position ? "--soloUMIposition ${meta.umi_position} " : "");
- solotype_args = solotype_args + (meta.adapter_seq ? "--soloAdapterSequence ${meta.adapter_seq} " : "");
- solotype_args = solotype_args + (meta.max_mismatch_adapter ? "--soloAdapterMismatchesNmax ${meta.max_mismatch_adapter} " : "");
- break
- case "SmartSeq":
- solotype_args = "--soloUMIdedup Exact ";
- solotype_args = solotype_args + (meta.strandedness ? "--soloStrand ${meta.strandedness} " : "");
- solotype_args = solotype_args + "--outSAMattrRGline ID:${prefix} ";
- break
- default:
- log.warn("Unknown output solotype (${solotype})");
- break
- }
-
- """
- STAR \\
- --genomeDir $index \\
- --readFilesIn ${reverse.join( "," )} ${forward.join( "," )} \\
- --runThreadN $task.cpus \\
- --outFileNamePrefix $prefix. \\
- --soloType $solotype \\
- $zcat \\
- $solotype_args \\
- $args
-
- if [ -d ${prefix}.Solo.out ]; then
- find ${prefix}.Solo.out \\( -name "*.tsv" -o -name "*.mtx" \\) -exec gzip {} \\;
- fi
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- star: \$(STAR --version | sed -e "s/STAR_//g")
- END_VERSIONS
- """
-
- stub:
- def args = task.ext.args ?: ''
- def prefix = task.ext.prefix ?: "${meta.id}"
- """
- mkdir ${prefix}.Solo.out/
- touch ${prefix}.Solo.out/Log.final.out
- touch ${prefix}.Solo.out/Log.out
- touch ${prefix}.Solo.out/Log.progress.out
- touch ${prefix}.Solo.out/Summary.csv
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- star: \$(STAR --version | sed -e "s/STAR_//g")
- END_VERSIONS
- """
-}
diff --git a/modules/nf-core/star/starsolo/meta.yml b/modules/nf-core/star/starsolo/meta.yml
deleted file mode 100644
index 4fce56c..0000000
--- a/modules/nf-core/star/starsolo/meta.yml
+++ /dev/null
@@ -1,79 +0,0 @@
-name: "starsolo"
-description: Create a counts matrix for single-cell data using STARSolo, handling cell barcodes and UMI information.
-keywords:
- - align
- - count
- - genome
- - reference
-tools:
- - "starsolo":
- description: "Mapping, demultiplexing and quantification for single cell RNA-seq."
- homepage: "https://github.com/alexdobin/STAR/"
- documentation: "https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md"
- doi: "10.1101/2021.05.05.442755"
- licence: ["MIT"]
-input:
- - meta:
- type: map
- description: |
- Groovy Map containing sample information.
- Here, you should add all the specific barcode/umi
- information for each sample.
- e.g. `[ id:'test_starsolo', umi_len:'12', cb_start:1 ]`
- - solotype:
- type: string
- description: |
- Type of single-cell library.
- It can be CB_UMI_Simple for most common ones such as 10xv2 and 10xv3,
- CB_UMI_Complex for method such as inDrop and SmartSeq for SMART-Seq.
- - meta2:
- type: map
- description: Groovy Map containing the STAR index information.
- - index:
- type: directory
- description: STAR genome index
- pattern: "star"
- - reads:
- type: file
- description: |
- List of input FastQ files of size 1 and 2 for single-end and paired-end data,
- respectively.
-output:
- - meta:
- type: map
- description: |
- Groovy Map containing sample information.
- Here, you should add all the specific barcode/umi
- information for each sample.
- e.g. `[ id:'test_starsolo', umi_len:'12', cb_start:1 ]`
- - log_final:
- type: file
- description: STAR final log file
- pattern: "*Log.final.out"
- - log_out:
- type: file
- description: STAR lot out file
- pattern: "*Log.out"
- - log_progress:
- type: file
- description: STAR log progress file
- pattern: "*Log.progress.out"
- - summary:
- type: file
- description: STARSolo metrics summary CSV file.
- pattern: "*/Gene/Summary.csv"
- - versions:
- type: file
- description: File containing software versions
- pattern: "versions.yml"
-
-authors:
- - "@kevinmenden"
- - "@ggabernet"
- - "@grst"
- - "@fmalmeida"
- - "@rhreynolds"
- - "@apeltzer"
- - "@vivian-chen16"
- - "@maxulysse"
- - "@joaodemeirelles"
diff --git a/modules/nf-core/trinity/main.nf b/modules/nf-core/trinity/main.nf
deleted file mode 100644
index 3960a35..0000000
--- a/modules/nf-core/trinity/main.nf
+++ /dev/null
@@ -1,74 +0,0 @@
-process TRINITY {
- tag "$meta.id"
- label 'process_high_memory'
-
- conda "bioconda::trinity=2.13.2"
- container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/trinity:2.13.2--h00214ad_1':
- 'biocontainers/trinity:2.13.2--h00214ad_1' }"
-
- input:
- tuple val(meta), path(reads)
-
- output:
- tuple val(meta), path("*.fa.gz") , emit: transcript_fasta
- path "versions.yml" , emit: versions
-
- when:
- task.ext.when == null || task.ext.when
-
- script:
- def args = task.ext.args ?: ''
- def prefix = task.ext.prefix ?: "${meta.id}"
-
- if (meta.single_end) {
- reads_args = "--single ${reads}"
- } else {
- reads_args = "--left ${reads[0]} --right ${reads[1]}"
- }
-
- // --seqType argument, fasta or fastq. Exact pattern match .fasta or .fa suffix with optional .gz (gzip) suffix
- seqType_args = reads[0] ==~ /(.*fasta(.gz)?$)|(.*fa(.gz)?$)/ ? "fa" : "fq"
-
- // Define the memory requirements. Trinity needs this as an option.
- def avail_mem = 7
- if (!task.memory) {
- log.info '[Trinity] Available memory not known - defaulting to 7GB. Specify process memory requirements to change this.'
- } else {
- avail_mem = (task.memory.giga*0.8).intValue()
- }
-
- """
- # Note that Trinity needs the word 'trinity' in the outdir
-
- Trinity \\
- --seqType ${seqType_args} \\
- --max_memory ${avail_mem}G \\
- ${reads_args} \\
- --output ${prefix}_trinity \\
- --CPU $task.cpus \\
- $args
-
- gzip -cf ${prefix}_trinity.Trinity.fasta > ${prefix}.fa.gz
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- trinity: \$(echo \$(Trinity --version | head -n 1 2>&1) | sed 's/^Trinity version: Trinity-v//' ))
- END_VERSIONS
-
- # Need to only take the first line of --version since it will warn about not being up-to-date and this messes up the version.yaml.
- """
-
- stub:
- def prefix = task.ext.prefix ?: "${meta.id}"
- """
- touch ${prefix}.fa.gz
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- trinity: \$(echo \$(Trinity --version | head -n 1 2>&1) | sed 's/^Trinity version: Trinity-v//' ))
- END_VERSIONS
-
- # Need to only take the first line of --version since it will warn about not being up-to-date and this messes up the version.yaml.
- """
-}
diff --git a/modules/nf-core/trinity/meta.yml b/modules/nf-core/trinity/meta.yml
deleted file mode 100644
index 26e8c10..0000000
--- a/modules/nf-core/trinity/meta.yml
+++ /dev/null
@@ -1,45 +0,0 @@
-name: "trinity"
-description: Assembles a de novo transcriptome from RNAseq reads
-keywords:
- - assembly
- - de novo assembler
- - fasta
- - fastq
-tools:
- - "trinity":
- description: "Trinity assembles transcript sequences from Illumina RNA-Seq data."
- homepage: "https://github.com/trinityrnaseq/trinityrnaseq/wiki"
- documentation: "https://github.com/trinityrnaseq/trinityrnaseq/wiki"
- tool_dev_url: "https://github.com/trinityrnaseq/trinityrnaseq/"
- doi: "10.1038/nbt.1883"
- licence: "['BSD-3-clause']"
-
-input:
- - meta:
- type: map
- description: |
- Groovy Map containing sample information
- e.g. [ id:'test', single_end:false ]
- - reads:
- type: file
- description: fasta/fastq file of reads to be assembled into a transcriptome
- pattern: "*.{fa|fasta|fq|fastq}"
-
-output:
- - meta:
- type: map
- description: |
- Groovy Map containing sample information
- e.g. [ id:'test', single_end:false ]
- - versions:
- type: file
- description: File containing software versions
- pattern: "versions.yml"
- - transcript_fasta:
- type: file
- description: de novo assembled transcripts fasta file compressed
- pattern: "*.fa.gz"
-
-authors:
- - "@timslittle"
- - "@gallvp"
diff --git a/modules/nf-core/umitools/dedup/main.nf b/modules/nf-core/umitools/dedup/main.nf
deleted file mode 100644
index 56ea046..0000000
--- a/modules/nf-core/umitools/dedup/main.nf
+++ /dev/null
@@ -1,62 +0,0 @@
-process UMITOOLS_DEDUP {
- tag "$meta.id"
- label "process_medium"
-
- conda "bioconda::umi_tools=1.1.4"
- container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.4--py38hbff2b2d_1' :
- 'biocontainers/umi_tools:1.1.4--py38hbff2b2d_1' }"
-
- input:
- tuple val(meta), path(bam), path(bai)
- val get_output_stats
-
- output:
- tuple val(meta), path("${prefix}.bam") , emit: bam
- tuple val(meta), path("*.log") , emit: log
- tuple val(meta), path("*edit_distance.tsv"), optional:true, emit: tsv_edit_distance
- tuple val(meta), path("*per_umi.tsv") , optional:true, emit: tsv_per_umi
- tuple val(meta), path("*per_position.tsv") , optional:true, emit: tsv_umi_per_position
- path "versions.yml" , emit: versions
-
- when:
- task.ext.when == null || task.ext.when
-
- script:
- def args = task.ext.args ?: ''
- prefix = task.ext.prefix ?: "${meta.id}"
- def paired = meta.single_end ? "" : "--paired"
- stats = get_output_stats ? "--output-stats ${prefix}" : ""
- if ("$bam" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!"
-
- if (!(args ==~ /.*--random-seed.*/)) {args += " --random-seed=100"}
- """
- PYTHONHASHSEED=0 umi_tools \\
- dedup \\
- -I $bam \\
- -S ${prefix}.bam \\
- -L ${prefix}.log \\
- $stats \\
- $paired \\
- $args
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//')
- END_VERSIONS
- """
-
- stub:
- """
- touch ${prefix}.bam
- touch ${prefix}.log
- touch ${prefix}_edit_distance.tsv
- touch ${prefix}_per_umi.tsv
- touch ${prefix}_per_position.tsv
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//')
- END_VERSIONS
- """
-}
diff --git a/modules/nf-core/umitools/dedup/meta.yml b/modules/nf-core/umitools/dedup/meta.yml
deleted file mode 100644
index 534d4c6..0000000
--- a/modules/nf-core/umitools/dedup/meta.yml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: umitools_dedup
-description: Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read.
-keywords:
- - umitools
- - deduplication
- - dedup
-tools:
- - umi_tools:
- description: >
- UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs)
- and single cell RNA-Seq cell barcodes
- documentation: https://umi-tools.readthedocs.io/en/latest/
- license: ["MIT"]
-input:
- - meta:
- type: map
- description: |
- Groovy Map containing sample information
- e.g. [ id:'test', single_end:false ]
- - bam:
- type: file
- description: |
- BAM file containing reads to be deduplicated via UMIs.
- pattern: "*.{bam}"
- - bai:
- type: file
- description: |
- BAM index files corresponding to the input BAM file.
- pattern: "*.{bai}"
- - get_output_stats:
- type: boolean
- description: |
- Whether or not to generate output stats.
-output:
- - meta:
- type: map
- description: |
- Groovy Map containing sample information
- e.g. [ id:'test', single_end:false ]
- - bam:
- type: file
- description: BAM file with deduplicated UMIs.
- pattern: "*.{bam}"
- - log:
- type: file
- description: File with logging information
- pattern: "*.{log}"
- - tsv_edit_distance:
- type: file
- description: Reports the (binned) average edit distance between the UMIs at each position.
- pattern: "*edit_distance.tsv"
- - tsv_per_umi:
- type: file
- description: UMI-level summary statistics.
- pattern: "*per_umi.tsv"
- - tsv_umi_per_position:
- type: file
- description: Tabulates the counts for unique combinations of UMI and position.
- pattern: "*per_position.tsv"
- - versions:
- type: file
- description: File containing software versions
- pattern: "versions.yml"
-
-authors:
- - "@drpatelh"
- - "@grst"
- - "@klkeys"
diff --git a/modules/nf-core/umitools/extract/environment.yml b/modules/nf-core/umitools/extract/environment.yml
new file mode 100644
index 0000000..7d08ac0
--- /dev/null
+++ b/modules/nf-core/umitools/extract/environment.yml
@@ -0,0 +1,7 @@
+name: umitools_extract
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::umi_tools=1.1.4
diff --git a/modules/nf-core/umitools/extract/main.nf b/modules/nf-core/umitools/extract/main.nf
index 2f94fa9..4bd79e7 100644
--- a/modules/nf-core/umitools/extract/main.nf
+++ b/modules/nf-core/umitools/extract/main.nf
@@ -3,7 +3,7 @@ process UMITOOLS_EXTRACT {
label "process_single"
label "process_long"
- conda "bioconda::umi_tools=1.1.4"
+ conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/umi_tools:1.1.4--py38hbff2b2d_1' :
'biocontainers/umi_tools:1.1.4--py38hbff2b2d_1' }"
@@ -33,7 +33,7 @@ process UMITOOLS_EXTRACT {
cat <<-END_VERSIONS > versions.yml
"${task.process}":
- umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//')
+ umitools: \$( umi_tools --version | sed '/version:/!d; s/.*: //' )
END_VERSIONS
"""
} else {
@@ -49,7 +49,7 @@ process UMITOOLS_EXTRACT {
cat <<-END_VERSIONS > versions.yml
"${task.process}":
- umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//')
+ umitools: \$( umi_tools --version | sed '/version:/!d; s/.*: //' )
END_VERSIONS
"""
}
diff --git a/modules/nf-core/umitools/extract/meta.yml b/modules/nf-core/umitools/extract/meta.yml
index db64a0f..7695b27 100644
--- a/modules/nf-core/umitools/extract/meta.yml
+++ b/modules/nf-core/umitools/extract/meta.yml
@@ -1,15 +1,16 @@
name: umitools_extract
description: Extracts UMI barcode from a read and add it to the read name, leaving any sample barcode in place
keywords:
- - umitools
+ - UMI
+ - barcode
- extract
+ - umitools
tools:
- umi_tools:
description: >
- UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs)
- and single cell RNA-Seq cell barcodes
+ UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes
documentation: https://umi-tools.readthedocs.io/en/latest/
- license: ["MIT"]
+ license: "MIT"
input:
- meta:
type: map
@@ -29,9 +30,7 @@ output:
- reads:
type: file
description: >
- Extracted FASTQ files. |
- For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. |
- For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz.
+ Extracted FASTQ files. | For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz.
pattern: "*.{fastq.gz}"
- log:
type: file
@@ -41,7 +40,9 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
-
authors:
- "@drpatelh"
- "@grst"
+maintainers:
+ - "@drpatelh"
+ - "@grst"
diff --git a/modules/nf-core/umitools/extract/tests/main.nf.test b/modules/nf-core/umitools/extract/tests/main.nf.test
new file mode 100644
index 0000000..22242d1
--- /dev/null
+++ b/modules/nf-core/umitools/extract/tests/main.nf.test
@@ -0,0 +1,35 @@
+nextflow_process {
+
+ name "Test Process UMITOOLS_EXTRACT"
+ script "../main.nf"
+ process "UMITOOLS_EXTRACT"
+ config "./nextflow.config"
+ tag "modules_nfcore"
+ tag "modules"
+ tag "umitools"
+ tag "umitools/extract"
+
+ test("Should run without failures") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = [ [ id:'test', single_end:true ], // meta map
+ [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/umitools/extract/tests/main.nf.test.snap b/modules/nf-core/umitools/extract/tests/main.nf.test.snap
new file mode 100644
index 0000000..6d5944f
--- /dev/null
+++ b/modules/nf-core/umitools/extract/tests/main.nf.test.snap
@@ -0,0 +1,10 @@
+{
+ "versions": {
+ "content": [
+ [
+ "versions.yml:md5,5a18da2d3a5a4de15e7aaae9082d7abb"
+ ]
+ ],
+ "timestamp": "2023-12-08T09:41:43.540658352"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/umitools/extract/tests/nextflow.config b/modules/nf-core/umitools/extract/tests/nextflow.config
new file mode 100644
index 0000000..628f5fc
--- /dev/null
+++ b/modules/nf-core/umitools/extract/tests/nextflow.config
@@ -0,0 +1,9 @@
+process {
+
+ publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+
+ withName: UMITOOLS_EXTRACT {
+ ext.args = '--bc-pattern="NNNN"'
+ }
+
+}
diff --git a/modules/nf-core/umitools/extract/tests/tags.yml b/modules/nf-core/umitools/extract/tests/tags.yml
new file mode 100644
index 0000000..c3fb23d
--- /dev/null
+++ b/modules/nf-core/umitools/extract/tests/tags.yml
@@ -0,0 +1,2 @@
+umitools/extract:
+ - modules/nf-core/umitools/extract/**
diff --git a/modules/nf-core/umitools/group/main.nf b/modules/nf-core/umitools/group/main.nf
deleted file mode 100644
index 9a6370b..0000000
--- a/modules/nf-core/umitools/group/main.nf
+++ /dev/null
@@ -1,62 +0,0 @@
-process UMITOOLS_GROUP {
- tag "$meta.id"
- label 'process_medium'
-
- conda "bioconda::umi_tools=1.1.4"
- container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.4--py38hbff2b2d_1' :
- 'biocontainers/umi_tools:1.1.4--py38hbff2b2d_1' }"
-
- input:
- tuple val(meta), path(bam), path(bai)
- val create_bam
- val get_group_info
-
- output:
- tuple val(meta), path("*.log") , emit: log
- tuple val(meta), path("${prefix}.bam"), optional: true, emit: bam
- tuple val(meta), path("*.tsv") , optional: true, emit: tsv
- path "versions.yml" , emit: versions
-
- when:
- task.ext.when == null || task.ext.when
-
- script:
- def args = task.ext.args ?: ''
- prefix = task.ext.prefix ?: "${meta.id}"
- def paired = meta.single_end ? "" : "--paired"
- output_bam = create_bam ? "--output-bam -S ${prefix}.bam" : ""
- group_info = get_group_info ? "--group-out ${prefix}.tsv" : ""
-
- if (create_bam && "$bam" == "${prefix}.bam") { error "Input and output names are the same, set prefix in module configuration to disambiguate!" }
-
- if (!(args ==~ /.*--random-seed.*/)) {args += " --random-seed=100"}
- """
- PYTHONHASHSEED=0 umi_tools \\
- group \\
- -I $bam \\
- $output_bam \\
- -L ${prefix}.log \\
- $group_info \\
- $paired \\
- $args
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//')
- END_VERSIONS
- """
-
- stub:
- prefix = task.ext.prefix ?: "${meta.id}"
- """
- touch ${prefix}.bam
- touch ${prefix}.log
- touch ${prefix}.tsv
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//')
- END_VERSIONS
- """
-}
diff --git a/modules/nf-core/umitools/group/meta.yml b/modules/nf-core/umitools/group/meta.yml
deleted file mode 100644
index 1fa826d..0000000
--- a/modules/nf-core/umitools/group/meta.yml
+++ /dev/null
@@ -1,62 +0,0 @@
-name: umitools_group
-description: Group reads based on their UMI and mapping coordinates
-keywords:
- - umitools
- - umi
- - deduplication
- - dedup
- - clustering
-tools:
- - umi_tools:
- description: >
- UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs)
- and single cell RNA-Seq cell barcodes
- documentation: https://umi-tools.readthedocs.io/en/latest/
- license: ["MIT"]
-
-input:
- - meta:
- type: map
- description: |
- Groovy Map containing sample information
- e.g. [ id:'test', single_end:false ]
- - bam:
- type: file
- description: |
- BAM file containing reads to be deduplicated via UMIs.
- pattern: "*.{bam}"
- - bai:
- type: file
- description: |
- BAM index files corresponding to the input BAM file.
- pattern: "*.{bai}"
- - create_bam:
- type: boolean
- description: |
- Whether or not to create a read group tagged BAM file.
- - get_group_info:
- type: boolean
- description: |
- Whether or not to generate the flatfile describing the read groups, see docs for complete info of all columns
-
-output:
- - meta:
- type: map
- description: |
- Groovy Map containing sample information
- e.g. [ id:'test', single_end:false ]
- - bam:
- type: file
- description: a read group tagged BAM file.
- pattern: "${prefix}.{bam}"
- - log:
- type: file
- description: File with logging information
- pattern: "*.{log}"
- - tsv:
- type: file
- description: Flatfile describing the read groups, see docs for complete info of all columns
- pattern: "*.{tsv}"
-
-authors:
- - "@Joon-Klaps"
diff --git a/modules/pfr/custom/restoregffids/environment.yml b/modules/pfr/custom/restoregffids/environment.yml
new file mode 100644
index 0000000..2450c45
--- /dev/null
+++ b/modules/pfr/custom/restoregffids/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "custom_restoregffids"
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - "python=3.10.2"
diff --git a/modules/pfr/custom/restoregffids/main.nf b/modules/pfr/custom/restoregffids/main.nf
new file mode 100644
index 0000000..14e2c07
--- /dev/null
+++ b/modules/pfr/custom/restoregffids/main.nf
@@ -0,0 +1,35 @@
+process CUSTOM_RESTOREGFFIDS {
+ tag "$meta.id"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/python:3.10.2':
+ 'biocontainers/python:3.10.2' }"
+
+ input:
+ tuple val(meta), path(gff3)
+ path(ids_tsv)
+
+ output:
+ tuple val(meta), path("*.restored.ids.gff3") , emit: restored_ids_gff3
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ prefix = task.ext.prefix ?: "${meta.id}"
+ template 'restore_gff_ids.py'
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ touch "${prefix}.restored.ids.gff3"
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ python: \$(python --version | cut -d' ' -f2)
+ END_VERSIONS
+ """
+}
diff --git a/modules/pfr/custom/restoregffids/meta.yml b/modules/pfr/custom/restoregffids/meta.yml
new file mode 100644
index 0000000..4e42b82
--- /dev/null
+++ b/modules/pfr/custom/restoregffids/meta.yml
@@ -0,0 +1,58 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "custom_restoregffids"
+description: |
+ Restores IDs in a gff3 file based on a TSV table
+ consisting of original (first column) and new IDs (second column).
+ This module is helpful when some tools like EDTA implicitly shorten
+ the IDs without producing the ID map, leading to downstream mismatch
+ in IDs across files.
+keywords:
+ - genome
+ - gff
+ - ID
+ - shorten
+ - restore
+tools:
+ - "python":
+ description: |
+ Python is a programming language that lets you work quickly
+ and integrate systems more effectively
+ homepage: "https://www.python.org"
+ documentation: "https://docs.python.org/3/"
+ tool_dev_url: "https://github.com/python/cpython"
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'test' ]`
+ - gff3:
+ type: file
+ description: Input gff3 file
+ pattern: "*.{gff,gff3}"
+ - ids_tsv:
+ type: file
+ description: |
+ A TSV file with original (first column) and new ids (second column)
+ if id change was required
+ pattern: "*.tsv"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'test' ]`
+ - restored_ids_gff3:
+ type: file
+ description: GFF3 file with restored ids
+ pattern: "*.restored.ids.gff3"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@GallVp"
+maintainers:
+ - "@GallVp"
diff --git a/modules/pfr/custom/restoregffids/templates/restore_gff_ids.py b/modules/pfr/custom/restoregffids/templates/restore_gff_ids.py
new file mode 100755
index 0000000..d0699de
--- /dev/null
+++ b/modules/pfr/custom/restoregffids/templates/restore_gff_ids.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python3
+
+from platform import python_version
+
+ids_tsv = "$ids_tsv"
+input_gff3 = "$gff3"
+output_prefix = "$prefix"
+
+
+def create_name_mapping_from_tsv(file_path):
+ dictionary = {}
+
+ with open(file_path, "r") as tsv_file:
+ for line in tsv_file:
+ columns = line.strip().split("\\t")
+ if len(columns) != 2:
+ raise ValueError(f"{file_path} should be a two column TSV file")
+
+ orig_id, new_id = columns[0], columns[1]
+ dictionary[new_id] = orig_id
+
+ return dictionary
+
+
+def restore_gff3_ids(new_to_orig_ids, file_path, output_file_name):
+ # Write versions
+ with open(f"versions.yml", "w") as f_versions:
+ f_versions.write('"${task.process}":\\n')
+ f_versions.write(f" python: {python_version()}\\n")
+
+ with open(file_path, "r") as input_gff3_file:
+ input_lines = input_gff3_file.readlines()
+
+ with open(output_file_name, "w") as output_gff_file:
+ for line in input_lines:
+ if line.startswith("##"):
+ output_gff_file.write(line)
+ continue
+
+ new_id = line.split("\\t")[0]
+ orig_id = new_to_orig_ids[new_id]
+ output_gff_file.write("\\t".join([orig_id] + line.split("\\t")[1:]))
+
+
+if __name__ == "__main__":
+ new_to_orig_ids = create_name_mapping_from_tsv(ids_tsv)
+ restore_gff3_ids(new_to_orig_ids, input_gff3, f"{output_prefix}.restored.ids.gff3")
diff --git a/modules/pfr/custom/restoregffids/tests/main.nf.test b/modules/pfr/custom/restoregffids/tests/main.nf.test
new file mode 100644
index 0000000..521b924
--- /dev/null
+++ b/modules/pfr/custom/restoregffids/tests/main.nf.test
@@ -0,0 +1,63 @@
+nextflow_process {
+
+ name "Test Process CUSTOM_RESTOREGFFIDS"
+ script "../main.nf"
+ process "CUSTOM_RESTOREGFFIDS"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "custom"
+ tag "custom/restoregffids"
+
+ test("sarscov2-genome_gff3-success") {
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true)
+ ]
+ input[1] = Channel.of('Chr1\tMT192765.1').collectFile(name: 'id_map.tsv', newLine: true)
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert path(process.out.restored_ids_gff3.get(0).get(1)).getText().contains("Chr1") },
+ { assert !path(process.out.restored_ids_gff3.get(0).get(1)).getText().contains("MT192765.1") },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+
+ test("stub") {
+
+ options '-stub'
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true)
+ ]
+ input[1] = Channel.of('Chr1\tMT192765.1').collectFile(name: 'id_map.tsv', newLine: true)
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert process.out.restored_ids_gff3 != null },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/pfr/custom/restoregffids/tests/main.nf.test.snap b/modules/pfr/custom/restoregffids/tests/main.nf.test.snap
new file mode 100644
index 0000000..ffe43e7
--- /dev/null
+++ b/modules/pfr/custom/restoregffids/tests/main.nf.test.snap
@@ -0,0 +1,41 @@
+{
+ "sarscov2-genome_gff3-success": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.restored.ids.gff3:md5,2c294938b9eb4e52d19e14725c1d92a9"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,32d31c4f1da9a3d1be013fd163e5867e"
+ ],
+ "restored_ids_gff3": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.restored.ids.gff3:md5,2c294938b9eb4e52d19e14725c1d92a9"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,32d31c4f1da9a3d1be013fd163e5867e"
+ ]
+ }
+ ],
+ "timestamp": "2023-12-07T13:49:30.047425"
+ },
+ "versions": {
+ "content": [
+ [
+ "versions.yml:md5,32d31c4f1da9a3d1be013fd163e5867e"
+ ]
+ ],
+ "timestamp": "2023-12-07T13:49:30.071175"
+ }
+}
\ No newline at end of file
diff --git a/modules/pfr/custom/restoregffids/tests/tags.yml b/modules/pfr/custom/restoregffids/tests/tags.yml
new file mode 100644
index 0000000..1d4b9a8
--- /dev/null
+++ b/modules/pfr/custom/restoregffids/tests/tags.yml
@@ -0,0 +1,2 @@
+custom/restoregffids:
+ - "modules/pfr/custom/restoregffids/**"
diff --git a/modules/pfr/custom/shortenfastaids/environment.yml b/modules/pfr/custom/shortenfastaids/environment.yml
new file mode 100644
index 0000000..e80fa7c
--- /dev/null
+++ b/modules/pfr/custom/shortenfastaids/environment.yml
@@ -0,0 +1,11 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "custom_shortenfastaids"
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+
+dependencies:
+ - biopython==1.75
+ - python=3.8
diff --git a/modules/pfr/custom/shortenfastaids/main.nf b/modules/pfr/custom/shortenfastaids/main.nf
new file mode 100644
index 0000000..92762ef
--- /dev/null
+++ b/modules/pfr/custom/shortenfastaids/main.nf
@@ -0,0 +1,34 @@
+process CUSTOM_SHORTENFASTAIDS {
+ tag "$meta.id"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/biopython:1.75':
+ 'biocontainers/biopython:1.75' }"
+
+ input:
+ tuple val(meta), path(fasta)
+
+ output:
+ tuple val(meta), path("*.short.ids.fasta") , emit: short_ids_fasta , optional: true
+ tuple val(meta), path("*.short.ids.tsv") , emit: short_ids_tsv , optional: true
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ prefix = task.ext.prefix ?: "${meta.id}"
+ template 'shorten_fasta_ids.py'
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ python: \$(python --version | cut -d' ' -f2)
+ biopython: \$(pip list | grep "biopython" | cut -d' ' -f3)
+ END_VERSIONS
+ """
+}
diff --git a/modules/pfr/custom/shortenfastaids/meta.yml b/modules/pfr/custom/shortenfastaids/meta.yml
new file mode 100644
index 0000000..2425810
--- /dev/null
+++ b/modules/pfr/custom/shortenfastaids/meta.yml
@@ -0,0 +1,58 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "custom_shortenfastaids"
+description: |
+ Shortens fasta IDs and produces a new fasta along with a TSV table
+ consisting of original (first column) and new IDs (second column).
+ This module is helpful when some tools like EDTA implicitly shorten
+ the IDs without producing the ID map, leading to downstream mismatch
+ in IDs across files.
+keywords:
+ - genome
+ - fasta
+ - ID
+ - shorten
+tools:
+ - "biopython":
+ description: |
+ Biopython is a set of freely available tools for biological computation written in Python by
+ an international team of developers.
+ homepage: "https://biopython.org"
+ documentation: "https://biopython.org/wiki/Documentation"
+ tool_dev_url: "https://github.com/biopython/biopython"
+ doi: "10.1093/bioinformatics/btp163"
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'test' ]`
+ - fasta:
+ type: file
+ description: Input fasta file
+ pattern: "*.{fsa,fa,fasta}"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'test' ]`
+ - short_ids_fasta:
+ type: file
+ description: Fasta file with shortened ids if id change is required
+ pattern: "*.{fsa,fa,fasta}"
+ - short_ids_tsv:
+ type: file
+ description: |
+ A TSV file with original (first column) and new ids (second column)
+ if id change is required
+ pattern: "*.tsv"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@GallVp"
+maintainers:
+ - "@GallVp"
diff --git a/modules/local/edta/shorten_edta_ids/resources/usr/bin/shorten_fasta_ids_c97537f.py b/modules/pfr/custom/shortenfastaids/templates/shorten_fasta_ids.py
similarity index 55%
rename from modules/local/edta/shorten_edta_ids/resources/usr/bin/shorten_fasta_ids_c97537f.py
rename to modules/pfr/custom/shortenfastaids/templates/shorten_fasta_ids.py
index e5b62b3..54f35bf 100755
--- a/modules/local/edta/shorten_edta_ids/resources/usr/bin/shorten_fasta_ids_c97537f.py
+++ b/modules/pfr/custom/shortenfastaids/templates/shorten_fasta_ids.py
@@ -1,29 +1,22 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
import re
-import sys
from Bio import SeqIO
-
-# https://github.com/Plant-Food-Research-Open/assembly_qc
-# GPL-3.0: https://github.com/Plant-Food-Research-Open/assembly_qc/blob/main/LICENSE
+from importlib.metadata import version
+from platform import python_version
# The input fasta file path
-fasta_file_path = sys.argv[1]
-
-# The prefix for output files: prefix.renamed.ids.fa, prefix.renamed.ids.tsv
-output_files_prefix = sys.argv[2]
-
-# In the case where IDs have acceptable character and no change is needed, the output is stdout:
-# "IDs have acceptable length and character. No change required."
+fasta_file_path = "$fasta"
+output_files_prefix = "$prefix"
-def extract_fasta_ids(fasta_file_path):
+def extract_fasta_ids_and_descriptions(fasta_file_path):
fasta_file_obj = SeqIO.parse(fasta_file_path, "fasta")
ids = []
for record in fasta_file_obj:
- ids.append(record.id)
+ ids.append((record.id, record.description))
return ids
@@ -41,29 +34,39 @@ def write_fasta_with_new_ids(fasta_file_path, id_mapping, file_prefix):
replaced_records.append(record)
- SeqIO.write(replaced_records, f"{file_prefix}.renamed.ids.fa", "fasta")
+ SeqIO.write(replaced_records, f"{file_prefix}.short.ids.fasta", "fasta")
-def write_fasta_without_comments(fasta_file_path, file_prefix):
- old_fasta_file_obj = SeqIO.parse(fasta_file_path, "fasta")
-
- replaced_records = []
- for record in old_fasta_file_obj:
- record.description = ""
- replaced_records.append(record)
-
- SeqIO.write(replaced_records, f"{file_prefix}.renamed.ids.fa", "fasta")
+def do_id_need_to_change(id_and_description, silent=False):
+ id = id_and_description[0]
+ description = id_and_description[1]
+ if len(id) > 13:
+ if not silent:
+ print(f"{id} has length greater than 13")
+ return True
+ if not re.match(r"^[a-zA-Z0-9_]+\$", id):
+ if not silent:
+ print(f"{id} does not match '^[a-zA-Z0-9_]+\$'")
+ return True
-def do_id_need_to_change(id):
- if len(id) > 13 or not re.match(r"^[a-zA-Z0-9_]+$", id):
+ if description != id and description != "":
+ if not silent:
+ print(f"{id} contains a comment: {description.replace(id, '')}")
return True
+ if not silent:
+ print(f"{id} is acceptable")
return False
-def do_ids_need_to_change(ids):
- return any([do_id_need_to_change(id) for id in ids])
+def do_ids_need_to_change(ids_and_descriptions, silent=False):
+ return any(
+ [
+ do_id_need_to_change(id_and_description, silent)
+ for id_and_description in ids_and_descriptions
+ ]
+ )
def extract_common_patterns(ids):
@@ -83,23 +86,25 @@ def extract_common_patterns(ids):
return {pattern: pattern[:3] for pattern in common_patterns}
-def shorten_ids(ids, patterns_dict):
+def shorten_ids(input_ids_and_descriptions, patterns_dict):
shortened_ids = []
- for id in ids:
- if not do_id_need_to_change(id):
+ for id_and_description in input_ids_and_descriptions:
+ id = id_and_description[0]
+ description = "" # Treat description as absent as it will be removed by write_fasta_with_new_ids
+ if not do_id_need_to_change((id, description), silent=True):
shortened_ids.append(id)
continue
shortened_id = shorten_id_by_pattern_replacement(patterns_dict, id)
- if not do_id_need_to_change(shortened_id):
+ if not do_id_need_to_change((shortened_id, description), silent=True):
shortened_ids.append(shortened_id)
continue
shortened_id = f"Ctg{generate_hash(id)}"
- if not do_id_need_to_change(shortened_id):
+ if not do_id_need_to_change((shortened_id, description), silent=True):
shortened_ids.append(shortened_id)
continue
@@ -149,24 +154,27 @@ def fail_if_new_ids_not_valid(ids):
if __name__ == "__main__":
- input_ids = extract_fasta_ids(fasta_file_path)
+ input_ids_and_descriptions = extract_fasta_ids_and_descriptions(fasta_file_path)
+ input_ids = [x[0] for x in input_ids_and_descriptions]
- if not do_ids_need_to_change(input_ids):
- print("IDs have acceptable length and character. No change required.")
-
- with open(f"{output_files_prefix}.renamed.ids.tsv", "w") as f:
- f.write("IDs have acceptable length and character. No change required.")
-
- write_fasta_without_comments(fasta_file_path, output_files_prefix)
+ # Write versions
+ with open(f"versions.yml", "w") as f_versions:
+ f_versions.write('"${task.process}":\\n')
+ f_versions.write(f" python: {python_version()}\\n")
+ f_versions.write(f" biopython: {version('biopython')}\\n")
+ if not do_ids_need_to_change(input_ids_and_descriptions):
+ print("IDs have acceptable length and character. No change required.")
exit(0)
- new_ids = shorten_ids(input_ids, extract_common_patterns(input_ids))
+ new_ids = shorten_ids(
+ input_ids_and_descriptions, extract_common_patterns(input_ids)
+ )
fail_if_new_ids_not_valid(new_ids)
- with open(f"{output_files_prefix}.renamed.ids.tsv", "w") as f:
+ with open(f"{output_files_prefix}.short.ids.tsv", "w") as f:
for input_id, new_id in zip(input_ids, new_ids):
- f.write(f"{input_id}\t{new_id}\n")
+ f.write(f"{input_id}\\t{new_id}\\n")
write_fasta_with_new_ids(
fasta_file_path, zip(input_ids, new_ids), output_files_prefix
diff --git a/modules/pfr/custom/shortenfastaids/tests/main.nf.test b/modules/pfr/custom/shortenfastaids/tests/main.nf.test
new file mode 100644
index 0000000..dc46bae
--- /dev/null
+++ b/modules/pfr/custom/shortenfastaids/tests/main.nf.test
@@ -0,0 +1,131 @@
+nextflow_process {
+
+ name "Test Process CUSTOM_SHORTENFASTAIDS"
+ script "../main.nf"
+ process "CUSTOM_SHORTENFASTAIDS"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "custom"
+ tag "custom/shortenfastaids"
+
+ test("homo_sapiens-genome_fasta-no_change") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ],
+ file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert snapshot(process.out.versions).match("versions") },
+ { assert process.out.short_ids_fasta == [] },
+ { assert process.out.short_ids_tsv == [] }
+ )
+ }
+
+ }
+
+ test("sarscov2-genome_fasta-pattern_change") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ],
+ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+
+ test("homo_sapiens-genome2_fasta-length_change") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ],
+ file(params.test_data['homo_sapiens']['genome']['genome2_fasta'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+
+ test("custom_fasta-comment_change") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of('>Chr1 This is a test comment', 'AGCTAGCT')
+ | collectFile(name: 'sample.fasta', newLine: true)
+ | map { file -> [ [ id:'test' ], file ] }
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+
+ test("stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ],
+ file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.versions).match("versions") },
+ { assert process.out.short_ids_fasta == [] },
+ { assert process.out.short_ids_tsv == [] }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/pfr/custom/shortenfastaids/tests/main.nf.test.snap b/modules/pfr/custom/shortenfastaids/tests/main.nf.test.snap
new file mode 100644
index 0000000..8fed1b9
--- /dev/null
+++ b/modules/pfr/custom/shortenfastaids/tests/main.nf.test.snap
@@ -0,0 +1,170 @@
+{
+ "custom_fasta-comment_change": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.short.ids.fasta:md5,c861b9d46a4d9bdba66953cff572fc5d"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "test.short.ids.tsv:md5,8762f2bffbdff75c2812bad72ba52bba"
+ ]
+ ],
+ "2": [
+ "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba"
+ ],
+ "short_ids_fasta": [
+ [
+ {
+ "id": "test"
+ },
+ "test.short.ids.fasta:md5,c861b9d46a4d9bdba66953cff572fc5d"
+ ]
+ ],
+ "short_ids_tsv": [
+ [
+ {
+ "id": "test"
+ },
+ "test.short.ids.tsv:md5,8762f2bffbdff75c2812bad72ba52bba"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba"
+ ]
+ }
+ ],
+ "timestamp": "2023-12-07T13:33:05.523745"
+ },
+ "versions": {
+ "content": [
+ [
+ "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba"
+ ]
+ ],
+ "timestamp": "2023-12-07T13:30:30.361527"
+ },
+ "homo_sapiens-genome_fasta-no_change": {
+ "content": [
+ {
+ "0": [
+
+ ],
+ "1": [
+
+ ],
+ "2": [
+ "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba"
+ ],
+ "short_ids_fasta": [
+
+ ],
+ "short_ids_tsv": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba"
+ ]
+ }
+ ],
+ "timestamp": "2023-12-07T13:32:54.220188"
+ },
+ "homo_sapiens-genome2_fasta-length_change": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.short.ids.fasta:md5,1382acd98d4cd233a8062ef01b2aaa6d"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "test.short.ids.tsv:md5,99c0f2a529cb595b2d8530024ed2880e"
+ ]
+ ],
+ "2": [
+ "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba"
+ ],
+ "short_ids_fasta": [
+ [
+ {
+ "id": "test"
+ },
+ "test.short.ids.fasta:md5,1382acd98d4cd233a8062ef01b2aaa6d"
+ ]
+ ],
+ "short_ids_tsv": [
+ [
+ {
+ "id": "test"
+ },
+ "test.short.ids.tsv:md5,99c0f2a529cb595b2d8530024ed2880e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba"
+ ]
+ }
+ ],
+ "timestamp": "2023-12-07T13:33:01.924483"
+ },
+ "sarscov2-genome_fasta-pattern_change": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.short.ids.fasta:md5,14d6f587b6d28889c5c0f985e78d602f"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "test.short.ids.tsv:md5,d7a2af88e8549586e5616bff6a88bd71"
+ ]
+ ],
+ "2": [
+ "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba"
+ ],
+ "short_ids_fasta": [
+ [
+ {
+ "id": "test"
+ },
+ "test.short.ids.fasta:md5,14d6f587b6d28889c5c0f985e78d602f"
+ ]
+ ],
+ "short_ids_tsv": [
+ [
+ {
+ "id": "test"
+ },
+ "test.short.ids.tsv:md5,d7a2af88e8549586e5616bff6a88bd71"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba"
+ ]
+ }
+ ],
+ "timestamp": "2023-12-07T13:32:58.12885"
+ }
+}
\ No newline at end of file
diff --git a/modules/pfr/custom/shortenfastaids/tests/tags.yml b/modules/pfr/custom/shortenfastaids/tests/tags.yml
new file mode 100644
index 0000000..4715b64
--- /dev/null
+++ b/modules/pfr/custom/shortenfastaids/tests/tags.yml
@@ -0,0 +1,2 @@
+custom/shortenfastaids:
+ - "modules/pfr/custom/shortenfastaids/**"
diff --git a/modules/pfr/edta/edta/environment.yml b/modules/pfr/edta/edta/environment.yml
new file mode 100644
index 0000000..63160e8
--- /dev/null
+++ b/modules/pfr/edta/edta/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "edta_edta"
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - "bioconda::edta=2.1.0"
diff --git a/modules/pfr/edta/edta/main.nf b/modules/pfr/edta/edta/main.nf
new file mode 100644
index 0000000..a81c528
--- /dev/null
+++ b/modules/pfr/edta/edta/main.nf
@@ -0,0 +1,93 @@
+process EDTA_EDTA {
+ tag "$meta.id"
+ label 'process_high'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/edta:2.1.0--hdfd78af_1':
+ 'biocontainers/edta:2.1.0--hdfd78af_1' }"
+
+ input:
+ tuple val(meta), path(fasta)
+ path cds
+ path curatedlib
+ path rmout
+ path exclude
+
+ output:
+ tuple val(meta), path('*.log') , emit: log
+ tuple val(meta), path('*.EDTA.TElib.fa') , emit: te_lib_fasta
+ tuple val(meta), path('*.EDTA.pass.list') , emit: pass_list , optional: true
+ tuple val(meta), path('*.EDTA.out') , emit: out_file , optional: true
+ tuple val(meta), path('*.EDTA.TEanno.gff3') , emit: te_anno_gff3 , optional: true
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def mod_file_name = "${fasta}.mod"
+ def cds_file = cds ? "--cds $cds" : ''
+ def curatedlib_file = curatedlib ? "--curatedlib $curatedlib": ''
+ def rmout_file = rmout ? "--rmout $rmout" : ''
+ def exclude_file = exclude ? "--exclude $exclude" : ''
+ """
+ EDTA.pl \\
+ --genome $fasta \\
+ --threads $task.cpus \\
+ $cds_file \\
+ $curatedlib_file \\
+ $rmout_file \\
+ $exclude_file \\
+ $args \\
+ &> >(tee "${prefix}.log" 2>&1)
+
+ mv \\
+ "${mod_file_name}.EDTA.TElib.fa" \\
+ "${prefix}.EDTA.TElib.fa"
+
+ [ -f "${mod_file_name}.EDTA.raw/LTR/${mod_file_name}.pass.list" ] \\
+ && mv \\
+ "${mod_file_name}.EDTA.raw/LTR/${mod_file_name}.pass.list" \\
+ "${prefix}.EDTA.pass.list" \\
+ || echo "EDTA did not produce a pass.list file"
+
+ [ -f "${mod_file_name}.EDTA.anno/${mod_file_name}.out" ] \\
+ && mv \\
+ "${mod_file_name}.EDTA.anno/${mod_file_name}.out" \\
+ "${prefix}.EDTA.out" \\
+ || echo "EDTA did not produce an out file"
+
+ [ -f "${mod_file_name}.EDTA.TEanno.gff3" ] \\
+ && mv \\
+ "${mod_file_name}.EDTA.TEanno.gff3" \\
+ "${prefix}.EDTA.TEanno.gff3" \\
+ || echo "EDTA did not produce a TEanno gff3 file"
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ EDTA: \$(EDTA.pl -h | awk ' /##### Extensive/ {print \$7}')
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def touch_pass_list = args.contains("--anno 1") ? "touch ${prefix}.EDTA.pass.list" : ''
+ def touch_out_file = args.contains("--anno 1") ? "touch ${prefix}.EDTA.out" : ''
+ def touch_te_anno = args.contains("--anno 1") ? "touch ${prefix}.EDTA.TEanno.gff3": ''
+ """
+ touch "${prefix}.log"
+ touch "${prefix}.EDTA.TElib.fa"
+ $touch_pass_list
+ $touch_out_file
+ $touch_te_anno
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ EDTA: \$(EDTA.pl -h | awk ' /##### Extensive/ {print \$7}')
+ END_VERSIONS
+ """
+}
diff --git a/modules/pfr/edta/edta/meta.yml b/modules/pfr/edta/edta/meta.yml
new file mode 100644
index 0000000..52503b8
--- /dev/null
+++ b/modules/pfr/edta/edta/meta.yml
@@ -0,0 +1,82 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "edta_edta"
+description: Extensive de-novo TE Annotator (EDTA)
+keywords:
+ - genome
+ - repeat
+ - annotation
+ - transposable-elements
+tools:
+ - "edta":
+ description: Extensive de-novo TE Annotator (EDTA)
+ homepage: "https://github.com/oushujun/EDTA"
+ documentation: "https://github.com/oushujun/EDTA"
+ tool_dev_url: "https://github.com/oushujun/EDTA"
+ doi: "10.1186/s13059-019-1905-y"
+ licence: ["GPL v3"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'test' ]`
+ - fasta:
+ type: file
+ description: Genome fasta file
+ pattern: "*.{fsa,fa,fasta}"
+ - cds:
+ type: file
+ description: |
+ A FASTA file containing the coding sequence (no introns, UTRs, nor TEs)
+ of this genome or its close relative
+ pattern: "*.{fsa,fa,fasta}"
+ - curatedlib:
+ type: file
+ description: |
+ A curated library to keep consistent naming and classification for known TEs
+ pattern: "*.liban"
+ - rmout:
+ type: file
+ description: |
+ Homology-based TE annotation instead of using the EDTA library for masking in
+ RepeatMasker .out format
+ pattern: "*.out"
+ - exclude:
+ type: file
+ description: Exclude regions (bed format) from TE masking in the MAKER.masked output
+ pattern: "*.bed"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'test' ]`
+ - log:
+ type: file
+ description: Log emitted by EDTA
+ pattern: "*.log"
+ - te_lib_fasta:
+ type: file
+ description: A non-redundant TE library in fasta format
+ pattern: "*.EDTA.TElib.fa"
+ - pass_list:
+ type: file
+ description: A summary table of intact LTR-RTs with coordinate and structural information
+ pattern: "*.EDTA.pass.list"
+ - out_file:
+ type: file
+ description: RepeatMasker annotation of all LTR sequences in the genome
+ pattern: "*.EDTA.out"
+ - te_anno_gff3:
+ type: file
+ description: A gff3 file containing both structurally intact and fragmented TE annotations
+ pattern: "*.EDTA.TEanno.gff3"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@GallVp"
+maintainers:
+ - "@GallVp"
diff --git a/modules/pfr/edta/edta/tests/main.nf.test b/modules/pfr/edta/edta/tests/main.nf.test
new file mode 100644
index 0000000..3aed0a2
--- /dev/null
+++ b/modules/pfr/edta/edta/tests/main.nf.test
@@ -0,0 +1,77 @@
+nextflow_process {
+
+ name "Test Process EDTA_EDTA"
+ script "../main.nf"
+ process "EDTA_EDTA"
+ config "./nextflow.config"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "edta"
+ tag "edta/edta"
+
+ test("homo_sapiens-genome_fasta") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of(file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true))
+ | map { f ->
+ (
+ ['>Chr21'] +
+ f.readLines().subList(66666.toInteger(), 116666.toInteger()) // 4 MB to 7 MB; 60 bases per line
+ ).join('\\n')
+ }
+ | collectFile(name: 'genome_3_to_10_mb.fasta')
+ | map { f -> [ [ id: 'test'], f ] }
+ input[1] = []
+ input[2] = []
+ input[3] = []
+ input[4] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+
+ test("stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+ ]
+ input[1] = []
+ input[2] = []
+ input[3] = []
+ input[4] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.versions).match("versions") },
+ { assert process.out.log != null },
+ { assert process.out.te_lib_fasta != null },
+ { assert process.out.pass_list != null },
+ { assert process.out.out_file != null },
+ { assert process.out.te_anno_gff3 != null }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/pfr/edta/edta/tests/nextflow.config b/modules/pfr/edta/edta/tests/nextflow.config
new file mode 100644
index 0000000..e58e10e
--- /dev/null
+++ b/modules/pfr/edta/edta/tests/nextflow.config
@@ -0,0 +1,3 @@
+process {
+ ext.args = '--anno 1'
+}
diff --git a/modules/pfr/edta/edta/tests/tags.yml b/modules/pfr/edta/edta/tests/tags.yml
new file mode 100644
index 0000000..180ae6d
--- /dev/null
+++ b/modules/pfr/edta/edta/tests/tags.yml
@@ -0,0 +1,2 @@
+edta/edta:
+ - "modules/pfr/edta/edta/**"
diff --git a/modules/pfr/lai/environment.yml b/modules/pfr/lai/environment.yml
new file mode 100644
index 0000000..94fadbd
--- /dev/null
+++ b/modules/pfr/lai/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "lai"
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - "bioconda::LTR_retriever=2.9.0"
diff --git a/modules/pfr/lai/main.nf b/modules/pfr/lai/main.nf
new file mode 100644
index 0000000..d4fced9
--- /dev/null
+++ b/modules/pfr/lai/main.nf
@@ -0,0 +1,69 @@
+process LAI {
+ tag "$meta.id"
+ label 'process_high'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/ltr_retriever:2.9.0--hdfd78af_2':
+ 'biocontainers/ltr_retriever:2.9.0--hdfd78af_2' }"
+
+ input:
+ tuple val(meta), path(fasta)
+ path pass_list
+ path annotation_out
+ path monoploid_seqs
+
+ output:
+ tuple val(meta), path("*.LAI.log") , emit: log
+ tuple val(meta), path("*.LAI.out") , emit: lai_out , optional: true
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def monoploid_param = monoploid_seqs ? "-mono $monoploid_seqs" : ''
+ def lai_output_name = monoploid_seqs ? "${annotation_out}.${monoploid_seqs}.out.LAI" : "${annotation_out}.LAI"
+ """
+ # Remove comments from genome fasta,
+ # otherwise LAI triggers its sequence name change logic
+
+ sed \\
+ '/^>/ s/\\s.*\$//' \\
+ $fasta \\
+ > for_lai_no_comments.fsa
+
+ LAI \\
+ -genome for_lai_no_comments.fsa \\
+ -intact $pass_list \\
+ -all $annotation_out \\
+ -t $task.cpus \\
+ $monoploid_param \\
+ $args \\
+ > "${prefix}.LAI.log"
+
+ mv \\
+ $lai_output_name \\
+ "${prefix}.LAI.out" \\
+ || echo "LAI did not produce the output file"
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ lai: \$(cat /usr/local/share/LTR_retriever/LAI | grep "my \\\$version" | sed 's/my \$version="//; s/";//')
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ touch ${prefix}.LAI.log
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ lai: \$(cat /usr/local/share/LTR_retriever/LAI | grep "my \\\$version" | sed 's/my \$version="//; s/";//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/pfr/lai/meta.yml b/modules/pfr/lai/meta.yml
new file mode 100644
index 0000000..6fd7aef
--- /dev/null
+++ b/modules/pfr/lai/meta.yml
@@ -0,0 +1,68 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "lai"
+description: Estimates the mean LTR sequence identity in the genome
+keywords:
+ - genomics
+ - annotation
+ - repeat
+ - long terminal retrotransposon
+ - retrotransposon
+ - stats
+ - qc
+tools:
+ - "lai":
+ description: Assessing genome assembly quality using the LTR Assembly Index (LAI)
+ homepage: "https://github.com/oushujun/LTR_retriever"
+ documentation: "https://github.com/oushujun/LTR_retriever"
+ tool_dev_url: "https://github.com/oushujun/LTR_retriever"
+ doi: "10.1093/nar/gky730"
+ licence: ["GPL v3"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1' ]`
+ - fasta:
+ type: file
+ description: The genome file that is used to generate everything
+ pattern: "*.{fsa,fa,fasta}"
+ - pass_list:
+ type: file
+ description: A list of intact LTR-RTs generated by LTR_retriever
+ pattern: "*.pass.list"
+ - annotation_out:
+ type: file
+ description: RepeatMasker annotation of all LTR sequences in the genome
+ pattern: "*.out"
+ - monoploid_seqs:
+ type: file
+ description: |
+ This parameter is mainly for ployploid genomes. User provides a list of
+ sequence names that represent a monoploid (1x). LAI will be calculated only
+ on these sequences if provided.
+ pattern: "*.txt"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1', single_end:false ]`
+ - log:
+ type: file
+ description: Log from LAI
+ pattern: "*.LAI.log"
+ - lai_out:
+ type: file
+ description: |
+ Output file from LAI if LAI is able to estimate the index from the inputs
+ pattern: "*.LAI.out"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@GallVp"
+maintainers:
+ - "@GallVp"
diff --git a/modules/pfr/lai/tests/main.nf.test b/modules/pfr/lai/tests/main.nf.test
new file mode 100644
index 0000000..353043c
--- /dev/null
+++ b/modules/pfr/lai/tests/main.nf.test
@@ -0,0 +1,120 @@
+nextflow_process {
+
+ name "Test Process LAI"
+ script "../main.nf"
+ process "LAI"
+ config "./nextflow.config"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "lai"
+ tag "gt/suffixerator"
+ tag "nf-core/gunzip"
+ tag "gt/ltrharvest"
+ tag "ltrretriever"
+
+ test("homo_sapiens-genome_21_fasta-success") {
+
+ setup {
+ run("GUNZIP") {
+ script "../../../nf-core/gunzip"
+
+ process {
+ """
+ input[0] = [
+ [ id:'test' ],
+ file('/Users/hrauxr/Projects/nxf-modules/tests/data/chr1.fa.gz', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ run("GT_SUFFIXERATOR") {
+ script "../../../pfr/gt/suffixerator"
+
+ process {
+ """
+ input[0] = GUNZIP.out.gunzip
+ """
+ }
+ }
+
+ run("GT_LTRHARVEST") {
+ script "../../../pfr/gt/ltrharvest"
+
+ process {
+ """
+ input[0] = GT_SUFFIXERATOR.out.index
+ """
+ }
+ }
+
+ run("LTRRETRIEVER") {
+ script "../../../pfr/ltrretriever"
+
+ process {
+ """
+ input[0] = GUNZIP.out.gunzip
+ input[1] = GT_LTRHARVEST.out.tabout.map { meta, tabout -> tabout }
+ input[2] = []
+ input[3] = []
+ input[4] = []
+ """
+ }
+ }
+ }
+
+ when {
+ process {
+ """
+ input[0] = GUNZIP.out.gunzip
+ input[1] = LTRRETRIEVER.out.pass_list.map { meta, pass_list -> pass_list }
+ input[2] = LTRRETRIEVER.out.annotation_out.map { meta, annotation_out -> annotation_out }
+ input[3] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert path(process.out.log.get(0).get(1)).getText().contains("Dependency checking: Passed!") },
+ { assert path(process.out.log.get(0).get(1)).getText().contains("Calculate LAI:") },
+ { assert path(process.out.log.get(0).get(1)).getText().contains("Total LTR sequence content (0%) is too low for accurate LAI calculation") },
+ { assert path(process.out.log.get(0).get(1)).getText().contains("Sorry, LAI is not applicable on the current genome assembly.") },
+ { assert process.out.lai_out == [] },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+
+ test("stub") {
+
+ options '-stub'
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ],
+ file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
+ ]
+ input[1] = []
+ input[2] = []
+ input[3] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/pfr/lai/tests/main.nf.test.snap b/modules/pfr/lai/tests/main.nf.test.snap
new file mode 100644
index 0000000..751ddb6
--- /dev/null
+++ b/modules/pfr/lai/tests/main.nf.test.snap
@@ -0,0 +1,10 @@
+{
+ "versions": {
+ "content": [
+ [
+ "versions.yml:md5,2ac93e1e6324236af6f9a794bbac2099"
+ ]
+ ],
+ "timestamp": "2023-12-05T12:15:32.969684"
+ }
+}
\ No newline at end of file
diff --git a/modules/pfr/lai/tests/nextflow.config b/modules/pfr/lai/tests/nextflow.config
new file mode 100644
index 0000000..516a3e2
--- /dev/null
+++ b/modules/pfr/lai/tests/nextflow.config
@@ -0,0 +1,10 @@
+process {
+
+ withName: GT_SUFFIXERATOR {
+ ext.args = '-tis -suf -lcp -des -ssp -sds -dna'
+ }
+
+ withName: GT_LTRHARVEST {
+ ext.args = '-minlenltr 100 -maxlenltr 7000 -mintsd 4 -maxtsd 6 -motif TGCA -motifmis 1 -similar 85 -vic 10 -seed 20 -seqids yes'
+ }
+}
diff --git a/modules/pfr/lai/tests/tags.yml b/modules/pfr/lai/tests/tags.yml
new file mode 100644
index 0000000..252295d
--- /dev/null
+++ b/modules/pfr/lai/tests/tags.yml
@@ -0,0 +1,2 @@
+lai:
+ - "modules/pfr/lai/**"
diff --git a/modules/pfr/liftoff/environment.yml b/modules/pfr/liftoff/environment.yml
new file mode 100644
index 0000000..8761c9b
--- /dev/null
+++ b/modules/pfr/liftoff/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "liftoff"
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - "bioconda::liftoff=1.6.3"
diff --git a/modules/pfr/liftoff/main.nf b/modules/pfr/liftoff/main.nf
new file mode 100644
index 0000000..317eca1
--- /dev/null
+++ b/modules/pfr/liftoff/main.nf
@@ -0,0 +1,62 @@
+process LIFTOFF {
+ tag "$meta.id"
+ label 'process_high'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/liftoff:1.6.3--pyhdfd78af_0':
+ 'biocontainers/liftoff:1.6.3--pyhdfd78af_0' }"
+
+ input:
+ tuple val(meta), path(target_fa)
+ path ref_fa, name: 'ref_assembly.fa'
+ path ref_annotation
+
+ output:
+ tuple val(meta), path("${prefix}.gff3") , emit: gff3
+ tuple val(meta), path("*.polished.gff3") , emit: polished_gff3, optional: true
+ tuple val(meta), path("*.unmapped.txt") , emit: unmapped_txt
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ liftoff \\
+ -g $ref_annotation \\
+ -p $task.cpus \\
+ -o "${prefix}.gff3" \\
+ -u "${prefix}.unmapped.txt" \\
+ $args \\
+ $target_fa \\
+ ref_assembly.fa
+
+ mv \\
+ "${prefix}.gff3_polished" \\
+ "${prefix}.polished.gff3" \\
+ || echo "-polish is absent"
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ liftoff: \$(liftoff --version 2> /dev/null)
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ prefix = task.ext.prefix ?: "${meta.id}"
+ def touch_polished = args.contains('-polish') ? "touch ${prefix}.polished.gff3" : ''
+ """
+ touch "${prefix}.gff3"
+ touch "${prefix}.unmapped.txt"
+ $touch_polished
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ liftoff: \$(liftoff --version 2> /dev/null)
+ END_VERSIONS
+ """
+}
diff --git a/modules/pfr/liftoff/meta.yml b/modules/pfr/liftoff/meta.yml
new file mode 100644
index 0000000..46b3c58
--- /dev/null
+++ b/modules/pfr/liftoff/meta.yml
@@ -0,0 +1,66 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "liftoff"
+description: |
+ Uses Liftoff to accurately map annotations in GFF or GTF between assemblies of the same,
+ or closely-related species
+keywords:
+ - genome
+ - annotation
+ - gff3
+ - gtf
+ - liftover
+tools:
+ - "liftoff":
+ description: |
+ Liftoff is a tool that accurately maps annotations in GFF or GTF between assemblies of the same,
+ or closely-related species
+ homepage: "https://github.com/agshumate/Liftoff"
+ documentation: "https://github.com/agshumate/Liftoff"
+ tool_dev_url: "https://github.com/agshumate/Liftoff"
+ doi: "10.1093/bioinformatics/bty191"
+ licence: ["GPL v3 License"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'test' ]`
+ - target_fa:
+ type: file
+ description: Target assembly in fasta format
+ pattern: "*.{fsa,fa,fasta}"
+ - ref_fa:
+ type: file
+ description: Reference assembly in fasta format
+ pattern: "*.{fsa,fa,fasta}"
+ - ref_annotation:
+ type: file
+ description: Reference assembly annotations in gtf or gff3 format
+ pattern: "*.{gtf,gff3}"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'test' ]`
+ - gff3:
+ type: file
+ description: Lifted annotations for the target assembly in gff3 format
+ pattern: "*.gff3"
+ - polished_gff3:
+ type: file
+ description: Polished lifted annotations for the target assembly in gff3 format
+ pattern: "*.polished.gff3"
+ - unmapped_txt:
+ type: file
+ description: List of unmapped reference annotations
+ pattern: "*.unmapped.txt"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@GallVp"
+maintainers:
+ - "@GallVp"
diff --git a/modules/pfr/liftoff/tests/main.nf.test b/modules/pfr/liftoff/tests/main.nf.test
new file mode 100644
index 0000000..272c882
--- /dev/null
+++ b/modules/pfr/liftoff/tests/main.nf.test
@@ -0,0 +1,119 @@
+nextflow_process {
+
+ name "Test Process LIFTOFF"
+ script "../main.nf"
+ process "LIFTOFF"
+ config "./nextflow.config"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "nf-core/gunzip"
+ tag "liftoff"
+
+ test("homo_sapiens-genome_21_fasta-genome_1_fasta-genome_1_gtf") {
+
+ setup {
+ run("GUNZIP") {
+ script "../../../nf-core/gunzip"
+
+ process {
+ """
+ input[0] = [
+ [ id:'test' ],
+ file(params.test_data['homo_sapiens']['genome']['genome_1_fasta'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+ }
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ],
+ file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
+ ]
+ input[1] = GUNZIP.out.gunzip.map { meta, file -> file }
+ input[2] = [
+ file(params.test_data['homo_sapiens']['genome']['genome_1_gtf'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.unmapped_txt).match("unmapped_txt") },
+ { assert file(process.out.gff3[0][1]).text.contains("chr21\tLiftoff\texon\t34608061\t34608118\t.\t+\t.") },
+ { assert file(process.out.polished_gff3[0][1]).text.contains("chr21\tLiftoff\texon\t34608061\t34608118\t.\t+\t.") },
+ {
+ assert snapshot(
+ (
+ [process.out.gff3[0][0].toString()] + // meta
+ process.out.gff3.collect { file(it[1]).getName() } +
+ process.out.polished_gff3.collect { file(it[1]).getName() } +
+ process.out.unmapped_txt.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("homo_sapiens-genome_21_fasta-genome_1_fasta-genome_1_gtf-for_stub_match")
+ },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+
+ test("homo_sapiens-genome_21_fasta-genome_1_fasta-genome_1_gtf-stub") {
+ options '-stub'
+
+ setup {
+ run("GUNZIP") {
+ script "../../../nf-core/gunzip"
+
+ process {
+ """
+ input[0] = [
+ [ id:'test' ],
+ file(params.test_data['homo_sapiens']['genome']['genome_1_fasta'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+ }
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ],
+ file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
+ ]
+ input[1] = GUNZIP.out.gunzip.map { meta, file -> file }
+ input[2] = [
+ file(params.test_data['homo_sapiens']['genome']['genome_1_gtf'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ {
+ assert snapshot(
+ (
+ [process.out.gff3[0][0].toString()] + // meta
+ process.out.gff3.collect { file(it[1]).getName() } +
+ process.out.polished_gff3.collect { file(it[1]).getName() } +
+ process.out.unmapped_txt.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("homo_sapiens-genome_21_fasta-genome_1_fasta-genome_1_gtf-for_stub_match")
+ },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/pfr/liftoff/tests/main.nf.test.snap b/modules/pfr/liftoff/tests/main.nf.test.snap
new file mode 100644
index 0000000..baa4d70
--- /dev/null
+++ b/modules/pfr/liftoff/tests/main.nf.test.snap
@@ -0,0 +1,34 @@
+{
+ "unmapped_txt": {
+ "content": [
+ [
+ [
+ {
+ "id": "test"
+ },
+ "test.unmapped.txt:md5,7391d10df6e15db356b084c9af5259e4"
+ ]
+ ]
+ ],
+ "timestamp": "2023-12-01T13:57:40.748507"
+ },
+ "versions": {
+ "content": [
+ [
+ "versions.yml:md5,205d9c609e7fe27d8199550d842bdce8"
+ ]
+ ],
+ "timestamp": "2023-12-01T13:57:40.752414"
+ },
+ "homo_sapiens-genome_21_fasta-genome_1_fasta-genome_1_gtf-for_stub_match": {
+ "content": [
+ [
+ "test.gff3",
+ "test.polished.gff3",
+ "test.unmapped.txt",
+ "{id=test}"
+ ]
+ ],
+ "timestamp": "2023-12-21T15:20:04.816416"
+ }
+}
\ No newline at end of file
diff --git a/modules/pfr/liftoff/tests/nextflow.config b/modules/pfr/liftoff/tests/nextflow.config
new file mode 100644
index 0000000..06b9d76
--- /dev/null
+++ b/modules/pfr/liftoff/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+ withName: LIFTOFF {
+ ext.args = '-exclude_partial -copies -polish -a 0.1 -s 0.1'
+ }
+}
diff --git a/modules/pfr/liftoff/tests/tags.yml b/modules/pfr/liftoff/tests/tags.yml
new file mode 100644
index 0000000..4ae1fb0
--- /dev/null
+++ b/modules/pfr/liftoff/tests/tags.yml
@@ -0,0 +1,2 @@
+liftoff:
+ - "modules/pfr/liftoff/**"
diff --git a/modules/pfr/repeatmodeler/builddatabase/environment.yml b/modules/pfr/repeatmodeler/builddatabase/environment.yml
new file mode 100644
index 0000000..ecc282e
--- /dev/null
+++ b/modules/pfr/repeatmodeler/builddatabase/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "repeatmodeler_builddatabase"
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - "bioconda::repeatmodeler=2.0.5"
diff --git a/modules/pfr/repeatmodeler/builddatabase/main.nf b/modules/pfr/repeatmodeler/builddatabase/main.nf
new file mode 100644
index 0000000..486e25d
--- /dev/null
+++ b/modules/pfr/repeatmodeler/builddatabase/main.nf
@@ -0,0 +1,50 @@
+process REPEATMODELER_BUILDDATABASE {
+ tag "$meta.id"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/repeatmodeler:2.0.5--pl5321hdfd78af_0':
+ 'biocontainers/repeatmodeler:2.0.5--pl5321hdfd78af_0' }"
+
+ input:
+ tuple val(meta), path(fasta)
+
+ output:
+ tuple val(meta), path("${prefix}.*") , emit: db
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ BuildDatabase \\
+ -name $prefix \\
+ $fasta
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ repeatmodeler: \$(RepeatModeler | grep '/usr/local/bin/RepeatModeler - ' | sed 's|/usr/local/bin/RepeatModeler - ||')
+ END_VERSIONS
+ """
+
+ stub:
+ prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ touch ${prefix}.nhr
+ touch ${prefix}.nin
+ touch ${prefix}.njs
+ touch ${prefix}.nnd
+ touch ${prefix}.nni
+ touch ${prefix}.nog
+ touch ${prefix}.nsq
+ touch ${prefix}.translation
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ repeatmodeler: \$(RepeatModeler | grep '/usr/local/bin/RepeatModeler - ' | sed 's|/usr/local/bin/RepeatModeler - ||')
+ END_VERSIONS
+ """
+}
diff --git a/modules/pfr/repeatmodeler/builddatabase/meta.yml b/modules/pfr/repeatmodeler/builddatabase/meta.yml
new file mode 100644
index 0000000..d3aa931
--- /dev/null
+++ b/modules/pfr/repeatmodeler/builddatabase/meta.yml
@@ -0,0 +1,44 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "repeatmodeler_builddatabase"
+description: Create a database for RepeatModeler
+keywords:
+ - genomics
+ - fasta
+ - repeat
+tools:
+ - "repeatmodeler":
+ description: "RepeatModeler is a de-novo repeat family identification and modeling package."
+ homepage: "https://github.com/Dfam-consortium/RepeatModeler"
+ documentation: "https://github.com/Dfam-consortium/RepeatModeler"
+ tool_dev_url: "https://github.com/Dfam-consortium/RepeatModeler"
+ licence: ["Open Software License v2.1"]
+
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1' ]`
+ - fasta:
+ type: file
+ description: Fasta file
+ pattern: "*.{fasta,fsa,fa}"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1' ]`
+ - db:
+ type: file
+ description: Database files for repeatmodeler
+ pattern: "`${prefix}.*`"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@GallVp"
+maintainers:
+ - "@GallVp"
diff --git a/modules/pfr/repeatmodeler/builddatabase/tests/main.nf.test b/modules/pfr/repeatmodeler/builddatabase/tests/main.nf.test
new file mode 100644
index 0000000..616f88c
--- /dev/null
+++ b/modules/pfr/repeatmodeler/builddatabase/tests/main.nf.test
@@ -0,0 +1,60 @@
+nextflow_process {
+
+ name "Test Process REPEATMODELER_BUILDDATABASE"
+ script "../main.nf"
+ process "REPEATMODELER_BUILDDATABASE"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "repeatmodeler"
+ tag "repeatmodeler/builddatabase"
+
+ test("sarscov2-genome_fasta") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.versions).match("versions") },
+ { assert snapshot(process.out.db[0][1].collect { file(it).name }.sort().toString()).match("for-stub-match") }
+ )
+ }
+
+ }
+
+ test("sarscov2-genome_fasta-stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.versions).match("versions") },
+ { assert snapshot(process.out.db[0][1].collect { file(it).name }.sort().toString()).match("for-stub-match") }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/pfr/repeatmodeler/builddatabase/tests/main.nf.test.snap b/modules/pfr/repeatmodeler/builddatabase/tests/main.nf.test.snap
new file mode 100644
index 0000000..cda327e
--- /dev/null
+++ b/modules/pfr/repeatmodeler/builddatabase/tests/main.nf.test.snap
@@ -0,0 +1,16 @@
+{
+ "versions": {
+ "content": [
+ [
+ "versions.yml:md5,7944637266bc3e2726899eaad5e46c87"
+ ]
+ ],
+ "timestamp": "2024-01-09T15:14:48.807063"
+ },
+ "for-stub-match": {
+ "content": [
+ "[test.nhr, test.nin, test.njs, test.nnd, test.nni, test.nog, test.nsq, test.translation]"
+ ],
+ "timestamp": "2024-01-09T15:14:48.81702"
+ }
+}
\ No newline at end of file
diff --git a/modules/pfr/repeatmodeler/builddatabase/tests/tags.yml b/modules/pfr/repeatmodeler/builddatabase/tests/tags.yml
new file mode 100644
index 0000000..426540d
--- /dev/null
+++ b/modules/pfr/repeatmodeler/builddatabase/tests/tags.yml
@@ -0,0 +1,2 @@
+repeatmodeler/builddatabase:
+ - "modules/pfr/repeatmodeler/builddatabase/**"
diff --git a/modules/pfr/repeatmodeler/repeatmodeler/environment.yml b/modules/pfr/repeatmodeler/repeatmodeler/environment.yml
new file mode 100644
index 0000000..2422071
--- /dev/null
+++ b/modules/pfr/repeatmodeler/repeatmodeler/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "repeatmodeler_repeatmodeler"
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - "bioconda::repeatmodeler=2.0.5"
diff --git a/modules/pfr/repeatmodeler/repeatmodeler/main.nf b/modules/pfr/repeatmodeler/repeatmodeler/main.nf
new file mode 100644
index 0000000..34df322
--- /dev/null
+++ b/modules/pfr/repeatmodeler/repeatmodeler/main.nf
@@ -0,0 +1,54 @@
+process REPEATMODELER_REPEATMODELER {
+ tag "$meta.id"
+ label 'process_high'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/repeatmodeler:2.0.5--pl5321hdfd78af_0':
+ 'biocontainers/repeatmodeler:2.0.5--pl5321hdfd78af_0' }"
+
+ input:
+ tuple val(meta), path(db)
+
+ output:
+ tuple val(meta), path("*.fa") , emit: fasta
+ tuple val(meta), path("*.stk") , emit: stk
+ tuple val(meta), path("*.log") , emit: log
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def db_name = file(db[0]).getBaseName()
+ """
+ RepeatModeler \\
+ -database $db_name \\
+ $args \\
+ -threads $task.cpus
+
+ mv ${db_name}-families.fa ${prefix}.fa
+ mv ${db_name}-families.stk ${prefix}.stk
+ mv ${db_name}-rmod.log ${prefix}.log
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ repeatmodeler: \$(RepeatModeler | grep '/usr/local/bin/RepeatModeler - ' | sed 's|/usr/local/bin/RepeatModeler - ||')
+ END_VERSIONS
+ """
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ touch ${prefix}.fa
+ touch ${prefix}.stk
+ touch ${prefix}.log
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ repeatmodeler: \$(RepeatModeler | grep '/usr/local/bin/RepeatModeler - ' | sed 's|/usr/local/bin/RepeatModeler - ||')
+ END_VERSIONS
+ """
+}
diff --git a/modules/pfr/repeatmodeler/repeatmodeler/meta.yml b/modules/pfr/repeatmodeler/repeatmodeler/meta.yml
new file mode 100644
index 0000000..29bb795
--- /dev/null
+++ b/modules/pfr/repeatmodeler/repeatmodeler/meta.yml
@@ -0,0 +1,52 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "repeatmodeler_repeatmodeler"
+description: Performs de novo transposable element (TE) family identification with RepeatModeler
+keywords:
+ - genomics
+ - fasta
+ - repeat
+ - transposable element
+tools:
+ - "repeatmodeler":
+ description: "RepeatModeler is a de-novo repeat family identification and modeling package."
+ homepage: "https://github.com/Dfam-consortium/RepeatModeler"
+ documentation: "https://github.com/Dfam-consortium/RepeatModeler"
+ tool_dev_url: "https://github.com/Dfam-consortium/RepeatModeler"
+ licence: ["Open Software License v2.1"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1' ]`
+ - db:
+ type: file
+ description: RepeatModeler database files generated with REPEATMODELER_BUILDDATABASE
+ pattern: "*"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1' ]`
+ - fasta:
+ type: file
+ description: Consensus repeat sequences
+ pattern: "*.fa"
+ - stk:
+ type: file
+ description: Seed alignments
+ pattern: "*.stk"
+ - log:
+ type: file
+ description: A summarized log of the run
+ pattern: "*.log"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@GallVp"
+maintainers:
+ - "@GallVp"
diff --git a/modules/pfr/repeatmodeler/repeatmodeler/tests/main.nf.test b/modules/pfr/repeatmodeler/repeatmodeler/tests/main.nf.test
new file mode 100644
index 0000000..78b7957
--- /dev/null
+++ b/modules/pfr/repeatmodeler/repeatmodeler/tests/main.nf.test
@@ -0,0 +1,92 @@
+nextflow_process {
+
+ name "Test Process REPEATMODELER_REPEATMODELER"
+ script "../main.nf"
+ process "REPEATMODELER_REPEATMODELER"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "repeatmodeler"
+ tag "repeatmodeler/repeatmodeler"
+ tag "repeatmodeler/builddatabase"
+
+ test("homo_sapiens-genome_fasta") {
+
+ setup {
+ run("REPEATMODELER_BUILDDATABASE") {
+ script "../../../../pfr/repeatmodeler/builddatabase"
+
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+ }
+
+ when {
+ process {
+ """
+ input[0] = REPEATMODELER_BUILDDATABASE.out.db
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.fasta).match("fasta") },
+ { assert snapshot(process.out.stk).match("stk") },
+ { assert file(process.out.log[0][1]).text.contains('1 families discovered.') },
+ { assert snapshot(process.out.versions).match("versions") },
+ {
+ assert snapshot(
+ (
+ process.out.fasta.collect { file(it[1]).getName() } +
+ process.out.stk.collect { file(it[1]).getName() } +
+ process.out.log.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("for-stub-match")
+ }
+ )
+ }
+
+ }
+
+ test("homo_sapiens-genome_fasta-stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.versions).match("versions") },
+ {
+ assert snapshot(
+ (
+ process.out.fasta.collect { file(it[1]).getName() } +
+ process.out.stk.collect { file(it[1]).getName() } +
+ process.out.log.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("for-stub-match")
+ }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/pfr/repeatmodeler/repeatmodeler/tests/main.nf.test.snap b/modules/pfr/repeatmodeler/repeatmodeler/tests/main.nf.test.snap
new file mode 100644
index 0000000..051dd60
--- /dev/null
+++ b/modules/pfr/repeatmodeler/repeatmodeler/tests/main.nf.test.snap
@@ -0,0 +1,46 @@
+{
+ "versions": {
+ "content": [
+ [
+ "versions.yml:md5,1bb6846ecf1304c262eaef4d3de60cf9"
+ ]
+ ],
+ "timestamp": "2024-01-09T15:06:55.753492"
+ },
+ "stk": {
+ "content": [
+ [
+ [
+ {
+ "id": "test"
+ },
+ "test.stk:md5,acd01ad35763c11315e2297a4f051d57"
+ ]
+ ]
+ ],
+ "timestamp": "2024-01-09T15:06:55.740963"
+ },
+ "for-stub-match": {
+ "content": [
+ [
+ "test.fa",
+ "test.log",
+ "test.stk"
+ ]
+ ],
+ "timestamp": "2024-01-09T15:06:55.759971"
+ },
+ "fasta": {
+ "content": [
+ [
+ [
+ {
+ "id": "test"
+ },
+ "test.fa:md5,e25326771341204e1f8054d9529411e5"
+ ]
+ ]
+ ],
+ "timestamp": "2024-01-09T15:06:55.737658"
+ }
+}
\ No newline at end of file
diff --git a/modules/pfr/repeatmodeler/repeatmodeler/tests/tags.yml b/modules/pfr/repeatmodeler/repeatmodeler/tests/tags.yml
new file mode 100644
index 0000000..648cc93
--- /dev/null
+++ b/modules/pfr/repeatmodeler/repeatmodeler/tests/tags.yml
@@ -0,0 +1,2 @@
+repeatmodeler/repeatmodeler:
+ - "modules/pfr/repeatmodeler/repeatmodeler/**"
diff --git a/nextflow.config b/nextflow.config
index a624175..409da80 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -1,80 +1,87 @@
includeConfig './conf/base.config'
params {
- // FASTA files (fasta, fasta.gz) for the assemblies to annotate
- //
- // Pattern: [["tag", "file path"]]
- // Permissible tags: tag, tag_1, tag_tag2_3, tag_tag2_tag3;
- // Any name with alphanumeric characters including "_".
- // "." is not allowed in the tag name
- // Unique, short tags are recommended.
- // Otherwise, some of the plots in the report may not display correctly.
- // Examples:
- // target_assemblies = [["tag1", "./a/relative/path/to/the/fasta/file.fasta"],
- // ["tag2", "./a/relative/path/to/the/fasta/file2.fasta"],
- // ["tag3", "https://ftp.ncbi.nlm.nih.gov/genomes/test_genome.fna"], ...]
- // target_assemblies = [["tair10", "/an/absolute/path/to/the/fasta/file.fasta"]]
target_assemblies = [
- ["red5_v2p1", "/workspace/hrauxr/pan-gene/.test/red5_v2p1_chr1.fasta"]
+ ["red5_v2p1", "/workspace/pangene/test_data/red5_v2p1_chr1.fasta"],
+ ["donghong", "/workspace/pangene/test_data/donghong.chr1.fsa.gz"]
]
+ // Pattern: [ [tag, fasta(.gz) ] ]
+ // Permissible tags: tag, tag_1, tag_tag2_3, tag_tag2_tag3;
+ // Any name with alphanumeric characters including "_".
+ // "." is not allowed in the tag name
- // TE libs (fasta, fasta.gz) for target_assemblies
- //
- // Optional Set to [] if libraries are not available, te_libraries = []
+ te_libraries = [
+ ["donghong", "/workspace/pangene/test_data/donghong.TElib.fa.gz"]
+ ]
+ // Pattern: [ [tag, fasta(.gz) ] ]
+ // Optional Set to null if libraries are not available.
//
// Each TE library should have an associated (by tag) assembly in target_assemblies.
// Not all target_assemblies need to have an associated (by tag) TE library.
// When the TE lib is not available for a traget assembly, EDTA is used to create one.
-
- te_libraries = []
-
- edta {
- is_sensitive = false
- save_outputs = true
- }
- repeatmasker {
- save_outputs = true
- }
-
- // Optional: Set to null if not available
- samplesheet = "./.test/samplesheet.csv"
- sample_prep {
- skip_fastqc = false
- skip_fastp = false
- min_trimmed_reads = 10000
- extra_fastp_args = ""
+ repeat_annotator = 'repeatmodeler'
+ // 'repeatmodeler' or 'edta'
- // toggling this parameter results in rerun of FASTP and FASTQC_TRIM
- save_trimmed = false
+ save_annotated_te_lib = true
- remove_ribo_rna = true
- save_non_ribo_reads = false
- ribo_database_manifest = "${projectDir}/assets/rrna-db-defaults.txt"
- }
+ edta_is_sensitive = false
- star_align {
- max_intron_length = 16000
- extra_star_align_args = ""
- save_outputs = false
- }
+ repeatmasker_save_outputs = true
+ samplesheet = "/workspace/pangene/test_data/samplesheet.csv"
// Optional: Set to null if not available
- external_protein_seqs = [
+
+ skip_fastqc = false
+ skip_fastp = false
+ min_trimmed_reads = 10000
+ extra_fastp_args = ""
+
+ save_trimmed = true
+ // toggling this parameter results in rerun of FASTP and FASTQC_TRIM
+
+ remove_ribo_rna = false
+ save_non_ribo_reads = true
+ ribo_database_manifest = "${projectDir}/assets/rrna-db-defaults.txt"
+
+ star_max_intron_length = 16000
+ star_align_extra_args = ""
+ star_save_outputs = true
+ save_cat_bam = true
+ // A single BAM is created for each assembly from all the RNAseq samples, if there
+ // are more than one
+
+ external_protein_fastas = [
"/workspace/ComparativeDataSources/OrthoDB11/Viridiplantae.fa.gz",
"/output/genomic/fairGenomes/Plant/Actinidia/chinensis/var_chinensis/male/2x/assembly_russell/v2.1/RU01.20221115150135.pep.fasta"
]
+ // Optional: Set to null if not available
+
+ braker_extra_args = ""
+
+ liftoff_xref_annotations = [
+ [
+ "/output/genomic/fairGenomes/Plant/Actinidia/chinensis/var_chinensis/male/2x/assembly_russell/v2.1/Russell_V2a.chromosomes.and.unassiged.and.haplotigs.fsa",
+ "/output/genomic/fairGenomes/Plant/Actinidia/chinensis/var_chinensis/male/2x/assembly_russell/v2.1/RU01.20221115150135.gff3"
+ ],
+ [
+ "/output/genomic/fairGenomes/Plant/Arabidopsis/thaliana/var_na/sex_na/2x/assembly_tair/v10/TAIR10_chr_all.fas",
+ "/output/genomic/fairGenomes/Plant/Arabidopsis/thaliana/var_na/sex_na/2x/assembly_tair/v10/TAIR10_GFF3_genes_transposons.fixed.gff3"
+ ]
+ ]
+ // Format: [ [ fasta(.gz), gff3(.gz) ] ]
+ // Optional: Set to null if not available
- braker {
- extra_braker_args = ""
- }
+ liftoff_coverage = 0.9
+ liftoff_identity = 0.9
- outdir = "./results"
+ outdir = "./results"
- max_cpus = 12
- max_memory = 200.GB
- max_time = 1.days
+ max_cpus = 12
+ max_memory = 200.GB
+ max_time = 1.days
}
+includeConfig './conf/manifest.config'
includeConfig './conf/modules.config'
-includeConfig './conf/reporting_defaults.config'
\ No newline at end of file
+includeConfig './conf/reporting_defaults.config'
diff --git a/pan_gene_pfr.sh b/pan_gene_pfr.sh
deleted file mode 100644
index a1b1ced..0000000
--- a/pan_gene_pfr.sh
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/bin/bash -e
-
-
-#SBATCH --job-name PAN_GENE
-#SBATCH --time=1-00:00:00
-#SBATCH --nodes=1
-#SBATCH --ntasks=1
-#SBATCH --cpus-per-task=2
-#SBATCH --output pan_gene_pfr.stdout
-#SBATCH --error pan_gene_pfr.stderr
-#SBATCH --mem=4G
-
-ml apptainer/1.1
-ml nextflow/22.10.4
-
-export TMPDIR="/workspace/$USER/tmp"
-
-nextflow main.nf -profile slurm -resume
\ No newline at end of file
diff --git a/pangene_local b/pangene_local
new file mode 100755
index 0000000..255edb9
--- /dev/null
+++ b/pangene_local
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+
+NO_FORMAT="\033[0m"
+C_RED="\033[38;5;9m"
+F_BOLD="\033[1m"
+
+[[ $1 == '-stub' ]] \
+ && stub='-stub' \
+ || stub=''
+
+[[ $1 == '-stub' ]] \
+ && echo 'Executing with -stub' \
+ || echo -e "${C_RED}${F_BOLD}Executing without -stub${NO_FORMAT}"
+
+nextflow \
+ main.nf \
+ -profile local,docker \
+ -resume \
+ $stub \
+ -params-file conf/test_params.json
diff --git a/pangene_pfr b/pangene_pfr
new file mode 100644
index 0000000..608798c
--- /dev/null
+++ b/pangene_pfr
@@ -0,0 +1,22 @@
+#!/bin/bash -e
+
+
+#SBATCH --job-name PANGENE
+#SBATCH --time=1-00:00:00
+#SBATCH --nodes=1
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=1
+#SBATCH --output pangene_pfr.stdout
+#SBATCH --error pangene_pfr.stderr
+#SBATCH --mem=4G
+
+ml apptainer/1.1
+ml nextflow/23.04.4
+
+export TMPDIR="/workspace/$USER/tmp"
+export APPTAINER_BINDPATH="$APPTAINER_BINDPATH,$TMPDIR:$TMPDIR,$TMPDIR:/tmp"
+
+nextflow \
+ main.nf \
+ -profile pfr,apptainer \
+ -resume
diff --git a/subworkflows/local/align_rnaseq.nf b/subworkflows/local/align_rnaseq.nf
new file mode 100644
index 0000000..05cd2fa
--- /dev/null
+++ b/subworkflows/local/align_rnaseq.nf
@@ -0,0 +1,76 @@
+include { STAR_ALIGN } from '../../modules/nf-core/star/align'
+include { SAMTOOLS_CAT } from '../../modules/nf-core/samtools/cat'
+
+workflow ALIGN_RNASEQ {
+ take:
+ reads_target // channel: [ meta, assembly_id ]
+ trim_reads // channel: [ meta, [ fq ] ]
+ assembly_index // channel: [ meta2, star_index ]
+
+ main:
+ ch_versions = Channel.empty()
+
+ // MODULE: STAR_ALIGN
+ ch_star_inputs = reads_target
+ | combine(trim_reads, by:0)
+ | map { meta, assembly, fastq ->
+ [
+ assembly,
+ [
+ id: "${meta.id}.on.${assembly}",
+ single_end: meta.single_end,
+ target_assembly: assembly
+ ],
+ fastq
+ ]
+ }
+ | combine(
+ assembly_index.map { meta, index -> [ meta.id, index ] },
+ by:0
+ )
+ | map { assembly, meta, fastq, index -> [ meta, fastq, index ] }
+
+ def star_ignore_sjdbgtf = true
+ def seq_platform = false
+ def seq_center = false
+
+ STAR_ALIGN(
+ ch_star_inputs.map { meta, fastq, index -> [ meta, fastq ] },
+ ch_star_inputs.map { meta, fastq, index -> [ [ id: meta.target_assembly ], index ] },
+ ch_star_inputs.map { meta, fastq, index -> [ [ id: meta.target_assembly ], [] ] },
+ star_ignore_sjdbgtf,
+ seq_platform,
+ seq_center
+ )
+
+ ch_star_bam = STAR_ALIGN.out.bam_sorted
+ ch_versions = ch_versions.mix(STAR_ALIGN.out.versions.first())
+
+ // MODULE: SAMTOOLS_CAT
+ ch_star_bam_branch = ch_star_bam
+ | map { meta, bam ->
+ [
+ [ id: meta.target_assembly ],
+ bam instanceof List ? bam.find { it =~ /Aligned/ } : bam
+ ]
+ }
+ | groupTuple
+ | branch { meta, bamList ->
+ bams: bamList.size() > 1
+ bam: bamList.size() <= 1
+ }
+
+ SAMTOOLS_CAT ( ch_star_bam_branch.bams )
+
+ ch_samtools_bam = SAMTOOLS_CAT.out.bam
+ | map { meta, bam -> [meta, [bam]] }
+ | mix(
+ ch_star_bam_branch.bam
+ )
+
+ ch_versions = ch_versions.mix(SAMTOOLS_CAT.out.versions.first())
+
+ emit:
+ bam = ch_samtools_bam // channel: [ [ id, single_end, target_assembly ], [ bam ] ]
+ versions = ch_versions // channel: [ versions.yml ]
+}
diff --git a/subworkflows/local/extract_samples.nf b/subworkflows/local/extract_samples.nf
index f3c1a15..947c0b7 100644
--- a/subworkflows/local/extract_samples.nf
+++ b/subworkflows/local/extract_samples.nf
@@ -1,15 +1,15 @@
-nextflow.enable.dsl=2
-
+// Source:
// https://github.com/nf-core/rnaseq
// MIT: https://github.com/nf-core/rnaseq/blob/master/LICENSE
//
+// Check input samplesheet and get read channels
+//
// Changes:
// Added channel permissible_target_assemblies
// Changed file name from input_check.nf to extract_samples.nf
// Removed strandedness
-//
-// Check input samplesheet and get read channels
-//
+// Nowing emitting an extra channel 'assemblies' which indicates the
+// assemblies targeted by each read
include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check'
@@ -20,36 +20,52 @@ workflow EXTRACT_SAMPLES {
main:
SAMPLESHEET_CHECK ( samplesheet, permissible_target_assemblies )
- .csv
- .splitCsv ( header:true, sep:',' )
- .map { create_fastq_channel(it) }
- .set { reads }
+ .csv
+ | splitCsv ( header:true, sep:',' )
+ | combine ( samplesheet )
+ | map { row, sheet ->
+ create_fastq_channel(row, sheet.getParent())
+ }
+ | set { ch_reads }
+
+ reads = ch_reads.map { meta, fastq -> [[id:meta.id, single_end:meta.single_end], fastq]}
+
+ ch_reads
+ | flatMap { meta, fastq ->
+ meta.target_assemblies.collect { assembly -> [[id:meta.id, single_end:meta.single_end], assembly] }
+ }
+ | set { assemblies }
emit:
- reads // channel: [ val(meta), [ reads ] ]
- versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ]
+ reads // channel: [ val(meta), [ reads ] ]
+ assemblies // channel: [ val(meta), val(assembly) ]
+ versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ]
}
// Function to get list of [ meta, [ fastq_1, fastq_2 ] ]
-def create_fastq_channel(LinkedHashMap row) {
+def create_fastq_channel(LinkedHashMap row, sheetPath) {
// create meta map
def meta = [:]
meta.id = row.sample
meta.single_end = row.single_end.toBoolean()
meta.target_assemblies = row.target_assemblies.split(";").sort()
+ def fq1 = row.fastq_1.startsWith("/") ? row.fastq_1 : "$sheetPath/${row.fastq_1}"
+ def fq2 = row.fastq_2.startsWith("/") ? row.fastq_2 : "$sheetPath/${row.fastq_2}"
+
// add path(s) of the fastq file(s) to the meta map
def fastq_meta = []
- if (!file(row.fastq_1).exists()) {
- exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}"
+ if (!file(fq1).exists()) {
+ exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${fq1}"
}
if (meta.single_end) {
- fastq_meta = [ meta, [ file(row.fastq_1) ] ]
+ fastq_meta = [ meta, [ file(fq1) ] ]
} else {
- if (!file(row.fastq_2).exists()) {
- exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}"
+ if (!file(fq2).exists()) {
+ exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${fq2}"
}
- fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ]
+ fastq_meta = [ meta, [ file(fq1), file(fq2) ] ]
}
+
return fastq_meta
-}
\ No newline at end of file
+}
diff --git a/subworkflows/local/fasta_liftoff.nf b/subworkflows/local/fasta_liftoff.nf
new file mode 100644
index 0000000..4c59ba3
--- /dev/null
+++ b/subworkflows/local/fasta_liftoff.nf
@@ -0,0 +1,98 @@
+include { GUNZIP as GUNZIP_FASTA } from '../../modules/nf-core/gunzip'
+include { GUNZIP as GUNZIP_GFF } from '../../modules/nf-core/gunzip'
+include { GFFREAD } from '../../modules/nf-core/gffread'
+include { LIFTOFF } from '../../modules/pfr/liftoff'
+
+workflow FASTA_LIFTOFF {
+ take:
+ target_assemby // Channel: [ meta, fasta ]
+ xref_fasta // Channel: [ meta2, fasta ]
+ xref_gff // Channel: [ meta2, gff3 ]
+
+ main:
+ ch_versions = Channel.empty()
+
+ // MODULE: GUNZIP as GUNZIP_FASTA
+ ch_xref_fasta_branch = xref_fasta
+ | branch { meta, file ->
+ gz: "$file".endsWith(".gz")
+ rest: !"$file".endsWith(".gz")
+ }
+
+ GUNZIP_FASTA ( ch_xref_fasta_branch.gz )
+
+ ch_xref_gunzip_fasta = GUNZIP_FASTA.out.gunzip
+ | mix(
+ ch_xref_fasta_branch.rest
+ )
+
+ ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions.first())
+
+ // MODULE: GUNZIP as GUNZIP_GFF
+ ch_xref_gff_branch = xref_gff
+ | branch { meta, file ->
+ gz: "$file".endsWith(".gz")
+ rest: !"$file".endsWith(".gz")
+ }
+
+ GUNZIP_GFF ( ch_xref_gff_branch.gz )
+
+ ch_xref_gunzip_gff = GUNZIP_GFF.out.gunzip
+ | mix(
+ ch_xref_gff_branch.rest
+ )
+
+ ch_versions = ch_versions.mix(GUNZIP_GFF.out.versions.first())
+
+ // MODULE: GFFREAD
+ ch_gffread_inputs = ch_xref_gunzip_gff
+ | map { meta, gff ->
+ [ gff.getSimpleName(), meta, gff ]
+ } // For meta insertion later, remove when GFFREAD has meta
+
+ GFFREAD ( ch_gffread_inputs.map { name, meta, gff -> gff } )
+
+ ch_gffread_gff = GFFREAD.out.gffread_gff
+ | map { gff -> [ gff.getSimpleName(), gff ] }
+ | join(ch_gffread_inputs)
+ | map { fid, gffread_gff, meta, gff -> [ meta, gffread_gff ] }
+ // meta insertion
+
+ ch_versions = ch_versions.mix(GFFREAD.out.versions.first())
+
+ // MODULE: LIFTOFF
+ ch_liftoff_inputs = target_assemby
+ | combine(
+ ch_xref_gunzip_fasta
+ | join(
+ ch_gffread_gff
+ )
+ )
+ | map { meta, target_fa, ref_meta, ref_fa, ref_gff ->
+ [
+ [
+ id: "${meta.id}.from.${ref_meta.id}",
+ target_assemby: meta.id
+ ],
+ target_fa,
+ ref_fa,
+ ref_gff
+ ]
+ }
+
+ LIFTOFF(
+ ch_liftoff_inputs.map { meta, target_fa, ref_fa, ref_gff -> [ meta, target_fa ] },
+ ch_liftoff_inputs.map { meta, target_fa, ref_fa, ref_gff -> ref_fa },
+ ch_liftoff_inputs.map { meta, target_fa, ref_fa, ref_gff -> ref_gff }
+ )
+
+ ch_liftoff_gff3 = LIFTOFF.out.polished_gff3
+ | map { meta, gff -> [ [ id: meta.target_assemby ], gff ] }
+ | groupTuple
+
+ ch_versions = ch_versions.mix(LIFTOFF.out.versions.first())
+
+ emit:
+ gff3 = ch_liftoff_gff3 // [ meta, [ gff3 ] ]
+ versions = ch_versions // [ versions.yml ]
+}
diff --git a/subworkflows/local/perform_edta_annotation.nf b/subworkflows/local/perform_edta_annotation.nf
deleted file mode 100644
index d362934..0000000
--- a/subworkflows/local/perform_edta_annotation.nf
+++ /dev/null
@@ -1,48 +0,0 @@
-nextflow.enable.dsl=2
-
-include { SHORTEN_EDTA_IDS } from '../../modules/local/edta/shorten_edta_ids'
-include { EDTA } from '../../modules/local/edta/edta'
-include { RESTORE_EDTA_IDS } from '../../modules/local/edta/restore_edta_ids'
-
-// https://github.com/Plant-Food-Research-Open/assembly_qc
-// GPL-3.0: https://github.com/Plant-Food-Research-Open/assembly_qc/blob/main/LICENSE
-workflow PERFORM_EDTA_ANNOTATION {
- take:
- genome_fasta // [meta, /path/to/genome/fasta]
-
- main:
-
- SHORTEN_EDTA_IDS(genome_fasta)
- .renamed_ids_fasta
- | EDTA
-
- RESTORE_EDTA_IDS(
- EDTA.out.te_lib_fasta,
- EDTA.out.intact_gff3.map { it[1] },
- EDTA.out.pass_list.map { it[1] },
- EDTA.out.out_file.map { it[1] },
- EDTA.out.te_anno_gff3.map { it[1] },
- SHORTEN_EDTA_IDS.out.renamed_ids_tsv.map { it[1] }
- )
-
- Channel.empty()
- | mix(
- SHORTEN_EDTA_IDS.out.versions.first()
- )
- | mix(
- EDTA.out.versions.first()
- )
- | mix(
- RESTORE_EDTA_IDS.out.versions.first()
- )
- | set { ch_versions }
-
- emit:
- te_lib_fasta = RESTORE_EDTA_IDS.out.te_lib_fasta
- intact_gff3 = RESTORE_EDTA_IDS.out.intact_gff3
- pass_list = RESTORE_EDTA_IDS.out.pass_list
- out_file = RESTORE_EDTA_IDS.out.out_file
- te_anno_gff3 = RESTORE_EDTA_IDS.out.te_anno_gff3
- renamed_ids_tsv = RESTORE_EDTA_IDS.out.renamed_ids_tsv
- versions = ch_versions
-}
\ No newline at end of file
diff --git a/subworkflows/local/prepare_assembly.nf b/subworkflows/local/prepare_assembly.nf
new file mode 100644
index 0000000..d18f5ce
--- /dev/null
+++ b/subworkflows/local/prepare_assembly.nf
@@ -0,0 +1,129 @@
+include { GUNZIP as GUNZIP_TARGET_ASSEMBLY } from '../../modules/nf-core/gunzip'
+include { GUNZIP as GUNZIP_TE_LIBRARY } from '../../modules/nf-core/gunzip'
+include { FASTAVALIDATOR } from '../../modules/nf-core/fastavalidator'
+include { REPEATMODELER_BUILDDATABASE } from '../../modules/pfr/repeatmodeler/builddatabase'
+include { REPEATMODELER_REPEATMODELER } from '../../modules/pfr/repeatmodeler/repeatmodeler'
+include { REPEATMASKER } from '../../modules/kherronism/repeatmasker'
+include { STAR_GENOMEGENERATE } from '../../modules/nf-core/star/genomegenerate'
+
+include { FASTA_EDTA_LAI } from '../../subworkflows/pfr/fasta_edta_lai'
+
+workflow PREPARE_ASSEMBLY {
+ take:
+ target_assembly // channel: [ meta, fasta ]
+ te_library // channel: [ meta, fasta ]
+ repeat_annotator // val(String), 'repeatmodeler' or 'edta'
+
+ main:
+ ch_versions = Channel.empty()
+
+ // MODULE: GUNZIP_TARGET_ASSEMBLY
+ target_assembly_branch = target_assembly
+ | branch { meta, file ->
+ gz: "$file".endsWith(".gz")
+ rest: !"$file".endsWith(".gz")
+ }
+
+ GUNZIP_TARGET_ASSEMBLY ( target_assembly_branch.gz )
+
+ ch_gunzip_assembly = GUNZIP_TARGET_ASSEMBLY.out.gunzip
+ | mix(
+ target_assembly_branch.rest
+ )
+ ch_versions = ch_versions.mix(GUNZIP_TARGET_ASSEMBLY.out.versions.first())
+
+
+ // MODULE: FASTAVALIDATOR
+ FASTAVALIDATOR ( ch_gunzip_assembly )
+
+ ch_validated_assembly = ch_gunzip_assembly
+ | join(FASTAVALIDATOR.out.success_log)
+ | map { meta, fasta, log -> [ meta, fasta ] }
+ ch_versions = ch_versions.mix(FASTAVALIDATOR.out.versions.first())
+
+ FASTAVALIDATOR.out.error_log
+ | map { meta, log ->
+ System.err.println("WARNING: FASTAVALIDATOR failed for ${meta.id} with error: ${log}. ${meta.id} is excluded from further analysis.")
+ }
+
+ // MODULE: GUNZIP_TE_LIBRARY
+ ch_te_library_branch = te_library
+ | branch { meta, file ->
+ gz: "$file".endsWith(".gz")
+ rest: !"$file".endsWith(".gz")
+ }
+
+ GUNZIP_TE_LIBRARY ( ch_te_library_branch.gz )
+
+ ch_gunzip_te_library = GUNZIP_TE_LIBRARY.out.gunzip
+ | mix(
+ ch_te_library_branch.rest
+ )
+ ch_versions = ch_versions.mix(GUNZIP_TE_LIBRARY.out.versions.first())
+
+ // SUBWORKFLOW: FASTA_EDTA_LAI
+ ch_annotator_inputs = ch_validated_assembly
+ | join(
+ ch_gunzip_te_library, remainder: true
+ )
+ | filter { meta, assembly, teLib ->
+ teLib == null
+ }
+ | map { meta, assembly, teLib -> [meta, assembly] }
+
+ ch_edta_inputs = repeat_annotator != 'edta'
+ ? Channel.empty()
+ : ch_annotator_inputs
+
+ FASTA_EDTA_LAI(
+ ch_edta_inputs,
+ [],
+ true // Skip LAI
+ )
+
+ ch_versions = ch_versions.mix(FASTA_EDTA_LAI.out.versions.first())
+
+ // MODULE: REPEATMODELER_BUILDDATABASE
+ ch_repeatmodeler_inputs = repeat_annotator != 'repeatmodeler'
+ ? Channel.empty()
+ : ch_annotator_inputs
+
+ REPEATMODELER_BUILDDATABASE ( ch_repeatmodeler_inputs )
+
+ ch_versions = ch_versions.mix(REPEATMODELER_BUILDDATABASE.out.versions.first())
+
+ // MODULE: REPEATMODELER_REPEATMODELER
+ REPEATMODELER_REPEATMODELER ( REPEATMODELER_BUILDDATABASE.out.db )
+
+ ch_assembly_and_te_lib = ch_validated_assembly
+ | join(
+ repeat_annotator == 'edta'
+ ? FASTA_EDTA_LAI.out.te_lib_fasta.mix(ch_gunzip_te_library)
+ : REPEATMODELER_REPEATMODELER.out.fasta.mix(ch_gunzip_te_library)
+ )
+
+ ch_versions = ch_versions.mix(REPEATMODELER_REPEATMODELER.out.versions.first())
+
+ // MODULE: REPEATMASKER
+ REPEATMASKER(
+ ch_assembly_and_te_lib.map { meta, assembly, teLib -> [meta, assembly] },
+ ch_assembly_and_te_lib.map { meta, assembly, teLib -> teLib },
+ )
+
+ ch_versions = ch_versions.mix(REPEATMASKER.out.versions.first())
+
+ // MODULE: STAR_GENOMEGENERATE
+ STAR_GENOMEGENERATE(
+ ch_validated_assembly,
+ ch_validated_assembly.map { meta, fasta -> [ [], [] ] }
+ )
+
+ ch_assembly_index = STAR_GENOMEGENERATE.out.index
+ ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions.first())
+
+ emit:
+ target_assemby = ch_validated_assembly // channel: [ meta, fasta ]
+ masked_target_assembly = REPEATMASKER.out.fasta_masked // channel: [ meta, fasta ]
+ target_assemby_index = ch_assembly_index // channel: [ meta, star_index ]
+ versions = ch_versions // channel: [ versions.yml ]
+}
diff --git a/subworkflows/local/prepare_ext_prots.nf b/subworkflows/local/prepare_ext_prots.nf
new file mode 100644
index 0000000..ee65f77
--- /dev/null
+++ b/subworkflows/local/prepare_ext_prots.nf
@@ -0,0 +1,35 @@
+include { GUNZIP } from '../../modules/nf-core/gunzip'
+include { CAT_CAT as CAT_PROTEIN_FASTAS } from '../../modules/nf-core/cat/cat'
+
+workflow PREPARE_EXT_PROTS {
+ take:
+ ch_ext_prot_fastas // Channel: [ meta, fasta ]
+
+ main:
+ ch_versions = Channel.empty()
+
+ // MODULE: GUNZIP
+ ch_ext_prot_seqs_branch = ch_ext_prot_fastas
+ | branch { meta, file ->
+ gz: "$file".endsWith(".gz")
+ rest: !"$file".endsWith(".gz")
+ }
+
+ GUNZIP ( ch_ext_prot_seqs_branch.gz )
+
+ ch_ext_prot_gunzip_fastas = GUNZIP.out.gunzip.mix(ch_ext_prot_seqs_branch.rest)
+ | map { meta, filePath -> filePath }
+ | collect
+ | map { fileList -> [ [ id: "ext_protein_seqs" ], fileList ] }
+
+ ch_versions = ch_versions.mix(GUNZIP.out.versions.first())
+
+ // MODULE: CAT_CAT as CAT_PROTEIN_FASTAS
+ CAT_PROTEIN_FASTAS ( ch_ext_prot_gunzip_fastas )
+
+ ch_versions = ch_versions.mix(CAT_PROTEIN_FASTAS.out.versions)
+
+ emit:
+ ext_prots_fasta = CAT_PROTEIN_FASTAS.out.file_out // Channel: [ meta, fasta ]
+ versions = ch_versions // Channel: [ versions.yml ]
+}
diff --git a/subworkflows/local/preprocess_rnaseq.nf b/subworkflows/local/preprocess_rnaseq.nf
new file mode 100644
index 0000000..9466104
--- /dev/null
+++ b/subworkflows/local/preprocess_rnaseq.nf
@@ -0,0 +1,101 @@
+include { CAT_FASTQ } from '../../modules/nf-core/cat/fastq'
+include { SORTMERNA } from '../../modules/nf-core/sortmerna'
+include { EXTRACT_SAMPLES } from '../../subworkflows/local/extract_samples'
+include { FASTQ_FASTQC_UMITOOLS_FASTP } from '../../subworkflows/nf-core/fastq_fastqc_umitools_fastp'
+
+workflow PREPROCESS_RNASEQ {
+ take:
+ samplesheet // path: csv
+ permissible_assemblies // val: assembly_a,assembly_b
+ skip_fastqc // val: true|false
+ skip_fastp // val: true|false
+ save_trimmed // val: true|false
+ min_trimmed_reads // val: Integer
+ remove_ribo_rna // val: true|false
+ sortmerna_fastas // channel: [ [ fasta ] ]
+
+ main:
+ ch_versions = Channel.empty()
+
+ // SUBWORKFLOW: EXTRACT_SAMPLES
+ EXTRACT_SAMPLES(
+ samplesheet,
+ permissible_assemblies
+ )
+
+ ch_fastq = EXTRACT_SAMPLES.out.reads
+ | map { meta, fastq ->
+ groupID = meta.id - ~/_T\d+/
+ [ meta + [id: groupID], fastq ]
+ }
+ | groupTuple()
+ | branch { meta, fastq ->
+ single : fastq.size() == 1
+ return [ meta, fastq.flatten() ]
+ multiple: fastq.size() > 1
+ return [ meta, fastq.flatten() ]
+ }
+
+ ch_reads_target = EXTRACT_SAMPLES.out.assemblies
+ | map { meta, assembly ->
+ groupID = meta.id - ~/_T\d+/
+ [ meta + [id: groupID], assembly ]
+ }
+ | unique
+
+ ch_versions = ch_versions.mix(EXTRACT_SAMPLES.out.versions)
+
+ // MODULES: CAT_FASTQ
+ CAT_FASTQ ( ch_fastq.multiple )
+
+ ch_cat_fastq = CAT_FASTQ.out.reads.mix(ch_fastq.single)
+ ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first())
+
+ // SUBWORKFLOW: FASTQ_FASTQC_UMITOOLS_FASTP
+ def with_umi = false
+ def skip_umi_extract = true
+ def umi_discard_read = false
+
+ FASTQ_FASTQC_UMITOOLS_FASTP (
+ ch_cat_fastq,
+ skip_fastqc,
+ with_umi,
+ skip_umi_extract,
+ umi_discard_read,
+ skip_fastp,
+ [],
+ save_trimmed,
+ save_trimmed,
+ min_trimmed_reads
+ )
+
+ ch_trim_reads = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads
+
+ ch_cat_fastq
+ | join(ch_trim_reads, remainder:true)
+ | map { meta, reads, trimmed ->
+ if (!trimmed) {
+ System.err.println("WARNING: Dropping ${reads.collect { it.getName() }} as read count after trimming is less than $min_trimmed_reads")
+ }
+ }
+
+ ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions.first())
+
+ // MODULE: SORTMERNA
+ SORTMERNA(
+ remove_ribo_rna ? ch_trim_reads : Channel.empty(),
+ sortmerna_fastas
+ )
+
+ ch_emitted_reads = remove_ribo_rna
+ ? SORTMERNA.out.reads
+ : ch_trim_reads
+ ch_versions = ch_versions.mix(SORTMERNA.out.versions.first())
+
+
+
+ emit:
+ trim_reads = ch_emitted_reads // channel: [ meta, [ fq ] ]
+ reads_target = ch_reads_target // channel: [ meta, assembly_id ]
+ versions = ch_versions // channel: [ versions.yml ]
+}
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf
index 63a6592..2c67b3c 100644
--- a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf
@@ -12,7 +12,10 @@ include { FASTP } from '../../../modules/nf-core/fastp/main'
//
import groovy.json.JsonSlurper
-def getFastpReadsAfterFiltering(json_file) {
+def getFastpReadsAfterFiltering(json_file, min_trimmed_reads) {
+
+ if (!json_file.text) { return min_trimmed_reads } // Usman Rashid: To allow -stub with FASTP
+
def Map json = (Map) new JsonSlurper().parseText(json_file.text).get('summary')
return json['after_filtering']['total_reads'].toLong()
}
@@ -96,8 +99,7 @@ workflow FASTQ_FASTQC_UMITOOLS_FASTP {
.out
.reads
.join(trim_json)
- // Change: Bypassing getFastpReadsAfterFiltering when FASTP stub returns empty json
- .map { meta, reads, json -> [ meta, reads, json.text ? getFastpReadsAfterFiltering(json) : min_trimmed_reads.toLong()] }
+ .map { meta, reads, json -> [ meta, reads, getFastpReadsAfterFiltering(json, min_trimmed_reads) ] }
.set { ch_num_trimmed_reads }
ch_num_trimmed_reads
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml
index eafb0dc..220e8db 100644
--- a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml
@@ -69,8 +69,10 @@ output:
- reads:
type: file
description: >
- Extracted FASTQ files. |
- For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. |
+ Extracted FASTQ files. | For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. |
+
+
+
For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz.
pattern: "*.{fastq.gz}"
- fastqc_html:
@@ -122,4 +124,5 @@ output:
pattern: "versions.yml"
authors:
- "@robsyme"
- - "@gallvp"
+maintainers:
+ - "@robsyme"
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test
new file mode 100644
index 0000000..cdd7398
--- /dev/null
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test
@@ -0,0 +1,60 @@
+nextflow_workflow {
+
+ name "Test Workflow FASTQ_FASTQC_UMITOOLS_FASTP"
+ script "../main.nf"
+ workflow "FASTQ_FASTQC_UMITOOLS_FASTP"
+ tag "subworkflows"
+ tag "subworkflows_nfcore"
+ tag "subworkflows/fastq_fastqc_umitools_fastp"
+ tag "fastq_fastqc_umitools_fastp"
+ tag "fastqc"
+ tag "umitools/extract"
+ tag "fastp"
+
+
+ test("sarscov2 paired-end [fastq]") {
+
+ when {
+ workflow {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
+ ]
+ ]
+ input[1] = false // skip_fastqc
+ input[2] = false // with_umi
+ input[3] = false // skip_umi_extract
+ input[4] = 1 // umi_discard_read
+ input[5] = false // skip_trimming
+ input[6] = [] // adapter_fasta
+ input[7] = false // save_trimmed_fail
+ input[8] = false // save_merged
+ input[9] = 1 // min_trimmed_reads
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(workflow.out.reads).match("reads") },
+ { assert snapshot(workflow.out.umi_log).match("umi_log") },
+ { assert snapshot(workflow.out.trim_json).match("trim_json") },
+ { assert snapshot(workflow.out.trim_reads_fail).match("trim_reads_fail") },
+ { assert snapshot(workflow.out.trim_reads_merged).match("trim_reads_merged") },
+ { assert snapshot(workflow.out.trim_read_count).match("trim_read_count") },
+ { assert snapshot(workflow.out.versions).match("versions") },
+
+ { assert workflow.out.fastqc_raw_html },
+ { assert workflow.out.fastqc_raw_zip },
+ { assert workflow.out.trim_html },
+ { assert workflow.out.trim_log },
+ { assert workflow.out.fastqc_trim_html },
+ { assert workflow.out.fastqc_trim_zip }
+ )
+ }
+ }
+}
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap
new file mode 100644
index 0000000..38a65ae
--- /dev/null
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap
@@ -0,0 +1,81 @@
+{
+ "trim_reads_merged": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-26T02:28:26.26920982"
+ },
+ "trim_reads_fail": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-26T02:28:26.25861515"
+ },
+ "versions": {
+ "content": [
+ [
+ "versions.yml:md5,85bd0117e5778fff18e3920972a296ad",
+ "versions.yml:md5,c50aa59475ab901bc6f9a2cf7b1a14e0",
+ "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e"
+ ]
+ ],
+ "timestamp": "2023-11-26T02:28:26.30891403"
+ },
+ "trim_json": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.json:md5,1e0f8e27e71728e2b63fc64086be95cd"
+ ]
+ ]
+ ],
+ "timestamp": "2023-11-26T02:28:26.24768259"
+ },
+ "reads": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_1.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7",
+ "test_2.fastp.fastq.gz:md5,25cbdca08e2083dbd4f0502de6b62f39"
+ ]
+ ]
+ ]
+ ],
+ "timestamp": "2023-12-04T11:30:32.061644815"
+ },
+ "umi_log": {
+ "content": [
+ [
+
+ ]
+ ],
+ "timestamp": "2023-11-26T02:28:26.238536"
+ },
+ "trim_read_count": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ 198
+ ]
+ ]
+ ],
+ "timestamp": "2023-11-26T02:28:26.27984169"
+ }
+}
\ No newline at end of file
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/tags.yml b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/tags.yml
new file mode 100644
index 0000000..84a4b56
--- /dev/null
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/tags.yml
@@ -0,0 +1,2 @@
+subworkflows/fastq_fastqc_umitools_fastp:
+ - subworkflows/nf-core/fastq_fastqc_umitools_fastp/**
diff --git a/subworkflows/pfr/fasta_edta_lai/main.nf b/subworkflows/pfr/fasta_edta_lai/main.nf
new file mode 100644
index 0000000..2e73ca5
--- /dev/null
+++ b/subworkflows/pfr/fasta_edta_lai/main.nf
@@ -0,0 +1,88 @@
+include { CUSTOM_SHORTENFASTAIDS } from '../../../modules/pfr/custom/shortenfastaids'
+include { EDTA_EDTA } from '../../../modules/pfr/edta/edta'
+include { LAI } from '../../../modules/pfr/lai'
+include { CUSTOM_RESTOREGFFIDS } from '../../../modules/pfr/custom/restoregffids'
+
+workflow FASTA_EDTA_LAI {
+
+ take:
+ ch_fasta // channel: [ val(meta), fasta ]
+ ch_monoploid_seqs // channel: [ val(meta), txt ]; Optional: Set to [] if not needed
+ skip_lai // val; true|false
+
+ main:
+
+ ch_versions = Channel.empty()
+
+ // MOUDLE: CUSTOM_SHORTENFASTAIDS
+ CUSTOM_SHORTENFASTAIDS ( ch_fasta )
+
+ ch_short_ids_fasta = ch_fasta
+ | join(CUSTOM_SHORTENFASTAIDS.out.short_ids_fasta, by:0, remainder:true)
+ | map { meta, fasta, short_ids_fasta ->
+ [ meta, short_ids_fasta ?: fasta ]
+ }
+
+ ch_short_ids_tsv = CUSTOM_SHORTENFASTAIDS.out.short_ids_tsv
+ ch_versions = ch_versions.mix(CUSTOM_SHORTENFASTAIDS.out.versions.first())
+
+ // MODULE: EDTA_EDTA
+ EDTA_EDTA (
+ ch_short_ids_fasta,
+ [],
+ [],
+ [],
+ []
+ )
+
+ ch_te_lib_fasta = EDTA_EDTA.out.te_lib_fasta
+ ch_pass_list = EDTA_EDTA.out.pass_list
+ ch_out_file = EDTA_EDTA.out.out_file
+ ch_te_anno_gff3 = EDTA_EDTA.out.te_anno_gff3
+ ch_versions = ch_versions.mix(EDTA_EDTA.out.versions.first())
+
+ // MODULE: LAI
+ ch_lai_inputs = skip_lai
+ ? Channel.empty()
+ : ch_short_ids_fasta
+ | join(ch_pass_list)
+ | join(ch_out_file)
+ | join(
+ ch_monoploid_seqs ?: Channel.empty(),
+ by:0,
+ remainder: true
+ )
+ | map { meta, fasta, pass, out, mono ->
+ [ meta, fasta, pass, out, mono ?: [] ]
+ }
+ LAI (
+ ch_lai_inputs.map { meta, fasta, pass, out, mono -> [ meta, fasta ] },
+ ch_lai_inputs.map { meta, fasta, pass, out, mono -> pass },
+ ch_lai_inputs.map { meta, fasta, pass, out, mono -> out },
+ ch_lai_inputs.map { meta, fasta, pass, out, mono -> mono }
+ )
+
+ ch_lai_log = LAI.out.log
+ ch_lai_out = LAI.out.lai_out
+ ch_versions = ch_versions.mix(LAI.out.versions.first())
+
+ // MODULE: CUSTOM_RESTOREGFFIDS
+ ch_restorable_gff_tsv = ch_te_anno_gff3.join(ch_short_ids_tsv)
+
+ CUSTOM_RESTOREGFFIDS (
+ ch_restorable_gff_tsv.map { meta, gff, tsv -> [ meta, gff ] },
+ ch_restorable_gff_tsv.map { meta, gff, tsv -> tsv }
+ )
+
+ ch_restored_gff = ch_te_anno_gff3
+ | join(CUSTOM_RESTOREGFFIDS.out.restored_ids_gff3, by:0, remainder:true)
+ | map { meta, gff, restored_gff -> [ meta, restored_gff ?: gff ] }
+ ch_versions = ch_versions.mix(CUSTOM_RESTOREGFFIDS.out.versions.first())
+
+ emit:
+ te_lib_fasta = ch_te_lib_fasta // channel: [ val(meta), fasta ]
+ te_anno_gff3 = ch_restored_gff // channel: [ val(meta), gff ]
+ lai_log = ch_lai_log // channel: [ val(meta), log ]
+ lai_out = ch_lai_out // channel: [ val(meta), out ]
+ versions = ch_versions // channel: [ versions.yml ]
+}
diff --git a/subworkflows/pfr/fasta_edta_lai/meta.yml b/subworkflows/pfr/fasta_edta_lai/meta.yml
new file mode 100644
index 0000000..52483ce
--- /dev/null
+++ b/subworkflows/pfr/fasta_edta_lai/meta.yml
@@ -0,0 +1,69 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
+name: "fasta_edta_lai"
+description: |
+ Performs extensive de-novo transposable element annotation with EDTA and optionally estimates repeat-space completeness with LAI
+keywords:
+ - genomics
+ - genome
+ - annotation
+ - repeat
+ - transposons
+ - stats
+ - qc
+components:
+ - custom/restoregffids
+ - custom/shortenfastaids
+ - edta/edta
+ - lai
+input:
+ - ch_fasta:
+ type: file
+ description: |
+ Channel for the assembly fasta file
+ Structure: [ val(meta), path(fasta) ]
+ pattern: "*.{fsa/fa/fasta}"
+ - ch_monoploid_seqs:
+ type: file
+ description: |
+ Channel for providing a list of monoploid sequences
+ for correct estimation of LAI for polyploid genomes.
+ This parameter is useful when all the haplotypes are
+ stored in a single fasta file.
+ Structure: [ val(meta), path(txt) ]
+ pattern: "*.txt"
+ - skip_lai:
+ type: boolean
+ description: |
+ Skip LAI estimation
+ Structure: [ val(boolean) ]
+output:
+ - te_lib_fasta:
+ type: file
+ description: A non-redundant TE library in fasta format
+ pattern: "*.EDTA.TElib.fa"
+ - te_anno_gff3:
+ type: file
+ description: A gff3 file containing both structurally intact and fragmented TE annotations
+ pattern: "*.EDTA.TEanno.gff3"
+ - lai_log:
+ type: file
+ description: |
+ Log from LAI
+ Structure: [ val(meta), path(log) ]
+ pattern: "*.LAI.log"
+ - lai_out:
+ type: file
+ description: |
+ LAI output
+ Structure: [ val(meta), path(out) ]
+ pattern: "*.LAI.out"
+ - versions:
+ type: file
+ description: |
+ File containing software versions
+ Structure: [ path(versions.yml) ]
+ pattern: "versions.yml"
+authors:
+ - "@GallVp"
+maintainers:
+ - "@GallVp"
diff --git a/subworkflows/pfr/fasta_edta_lai/tests/main.nf.test b/subworkflows/pfr/fasta_edta_lai/tests/main.nf.test
new file mode 100644
index 0000000..e852a70
--- /dev/null
+++ b/subworkflows/pfr/fasta_edta_lai/tests/main.nf.test
@@ -0,0 +1,50 @@
+nextflow_workflow {
+
+ name "Test Workflow FASTA_EDTA_LAI"
+ script "../main.nf"
+ workflow "FASTA_EDTA_LAI"
+
+ tag "subworkflows"
+ tag "subworkflows_nfcore"
+ tag "subworkflows/fasta_edta_lai"
+ tag "fasta_edta_lai"
+ tag "lai"
+ tag "edta/edta"
+ tag "custom/restoregffids"
+ tag "custom/shortenfastaids"
+
+ test("test_data") {
+
+ setup {
+ run("GUNZIP") {
+ script "../../../../modules/nf-core/gunzip"
+
+ process {
+ """
+ input[0] = [
+ [ id:'test' ],
+ file('/Users/hrauxr/Projects/nxf-modules/tests/data/genome.fasta.gz', checkIfExists: true)
+ ]
+ """
+ }
+ }
+ }
+
+ when {
+ workflow {
+ """
+ input[0] = GUNZIP.out.gunzip
+ input[1] = []
+ input[2] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(workflow.out.versions).match("versions") }
+ )
+ }
+ }
+}
diff --git a/subworkflows/pfr/fasta_edta_lai/tests/main.nf.test.snap b/subworkflows/pfr/fasta_edta_lai/tests/main.nf.test.snap
new file mode 100644
index 0000000..574acc9
--- /dev/null
+++ b/subworkflows/pfr/fasta_edta_lai/tests/main.nf.test.snap
@@ -0,0 +1,11 @@
+{
+ "versions": {
+ "content": [
+ [
+ "versions.yml:md5,0d4bc49e94acb8995ca552d4e666e3ce",
+ "versions.yml:md5,754bb19f86be761d90c002a0af2faf1c"
+ ]
+ ],
+ "timestamp": "2023-12-22T14:09:24.171934"
+ }
+}
\ No newline at end of file
diff --git a/subworkflows/pfr/fasta_edta_lai/tests/tags.yml b/subworkflows/pfr/fasta_edta_lai/tests/tags.yml
new file mode 100644
index 0000000..b114c58
--- /dev/null
+++ b/subworkflows/pfr/fasta_edta_lai/tests/tags.yml
@@ -0,0 +1,2 @@
+subworkflows/fasta_edta_lai:
+ - subworkflows/pfr/fasta_edta_lai/**
diff --git a/workflows/pan_gene.nf b/workflows/pan_gene.nf
deleted file mode 100644
index fccbe2e..0000000
--- a/workflows/pan_gene.nf
+++ /dev/null
@@ -1,406 +0,0 @@
-nextflow.enable.dsl=2
-
-include { GUNZIP as GUNZIP_TARGET_ASSEMBLY } from '../modules/nf-core/gunzip'
-include { GUNZIP as GUNZIP_TE_LIBRARY } from '../modules/nf-core/gunzip'
-include { GUNZIP as GUNZIP_EXTERNAL_PROTEIN_SEQ } from '../modules/nf-core/gunzip'
-include { FASTA_VALIDATE } from '../modules/local/fasta_validate'
-include { REPEATMASKER } from '../modules/kherronism/repeatmasker'
-include { STAR_GENOMEGENERATE } from '../modules/nf-core/star/genomegenerate'
-include { CAT_FASTQ } from '../modules/nf-core/cat/fastq'
-include { SORTMERNA } from '../modules/nf-core/sortmerna'
-include { STAR_ALIGN } from '../modules/nf-core/star/align'
-include { SAMTOOLS_CAT } from '../modules/nf-core/samtools/cat'
-include { CAT_CAT as CAT_PROTEIN_SEQS } from '../modules/nf-core/cat/cat'
-include { BRAKER3 } from '../modules/kherronism/braker3'
-
-include { PERFORM_EDTA_ANNOTATION } from '../subworkflows/local/perform_edta_annotation'
-include { EXTRACT_SAMPLES } from '../subworkflows/local/extract_samples'
-include { FASTQ_FASTQC_UMITOOLS_FASTP } from '../subworkflows/nf-core/fastq_fastqc_umitools_fastp'
-
-include { validateParams } from '../modules/local/validate_params'
-
-validateParams(params)
-
-// Additional validation
-// Check rRNA databases for sortmerna
-if (params.sample_prep.remove_ribo_rna) {
- ch_ribo_db = file(params.sample_prep.ribo_database_manifest, checkIfExists: true)
- if (ch_ribo_db.isEmpty()) {exit 1, "File provided with --ribo_database_manifest is empty: ${ch_ribo_db.getName()}!"}
-}
-
-workflow PAN_GENE {
-
- // Versions
- Channel.empty()
- | set { ch_versions }
-
- // GUNZIP: target_assemblies
- Channel.fromList(params.target_assemblies)
- | map { tag, filePath ->
- [[id:tag], file(filePath, checkIfExists: true)]
- }
- | branch { meta, file ->
- gz: "$file".endsWith(".gz")
- rest: !"$file".endsWith(".gz")
- }
- | set { ch_target_assemblies }
-
- GUNZIP_TARGET_ASSEMBLY(
- ch_target_assemblies.gz
- )
- .gunzip
- | mix(
- ch_target_assemblies.rest
- )
- | set { ch_gunzip_target_assemblies }
-
- ch_versions
- | mix(GUNZIP_TARGET_ASSEMBLY.out.versions.first())
- | set { ch_versions }
-
- // FASTA_VALIDATE
- FASTA_VALIDATE(ch_gunzip_target_assemblies)
- .valid_fasta
- | set { ch_validated_target_assemblies }
-
- ch_versions
- | mix(FASTA_VALIDATE.out.versions.first())
- | set { ch_versions }
-
- // GUNZIP: te_libraries
- Channel.fromList(params.te_libraries)
- | map { tag, filePath ->
- [[id:tag], file(filePath, checkIfExists: true)]
- }
- | branch { meta, file ->
- gz: "$file".endsWith(".gz")
- rest: !"$file".endsWith(".gz")
- }
- | set { ch_te_libraries }
-
- GUNZIP_TE_LIBRARY(
- ch_te_libraries.gz
- )
- .gunzip
- | mix(
- ch_te_libraries.rest
- )
- | set { ch_gunzip_te_libraries }
-
- ch_versions
- | mix(GUNZIP_TE_LIBRARY.out.versions.first())
- | set { ch_versions }
-
- // PERFORM_EDTA_ANNOTATION
- ch_validated_target_assemblies
- | join(
- ch_gunzip_te_libraries, remainder: true
- )
- | filter { meta, assembly, teLib ->
- teLib == null
- }
- | map {meta, assembly, teLib -> [meta, assembly]}
- | PERFORM_EDTA_ANNOTATION
-
- ch_versions
- | mix(PERFORM_EDTA_ANNOTATION.out.versions)
- | set { ch_versions }
-
- // REPEATMASKER
- ch_validated_target_assemblies
- | join(
- PERFORM_EDTA_ANNOTATION.out.te_lib_fasta.mix(ch_gunzip_te_libraries)
- )
- | set { ch_assemblies_n_te_libs }
-
- REPEATMASKER(
- ch_assemblies_n_te_libs.map {meta, assembly, teLib -> [meta, assembly]},
- ch_assemblies_n_te_libs.map {meta, assembly, teLib -> teLib},
- )
-
- ch_versions
- | mix(REPEATMASKER.out.versions.first())
- | set { ch_versions }
-
- // STAR_GENOMEGENERATE
- def star_ignore_sjdbgtf = true
- STAR_GENOMEGENERATE(
- REPEATMASKER.out.fasta_masked,
- REPEATMASKER.out.fasta_masked.map{meta, maskedFasta -> [meta, []]},
- star_ignore_sjdbgtf
- )
- .index
- | set { ch_assembly_index }
-
- ch_versions
- | mix(STAR_GENOMEGENERATE.out.versions.first())
- | set { ch_versions }
-
- // EXTRACT_SAMPLES
- // https://github.com/nf-core/rnaseq
- // MIT: https://github.com/nf-core/rnaseq/blob/master/LICENSE
- // Changes
- // Use meta.id as key for groupTuple as groupTuple does not work when there is a sublist in the key list
- ch_samplesheet_path = Channel.empty()
- if(params.samplesheet != null) {
- ch_samplesheet_path = Channel.fromPath(params.samplesheet)
- }
-
- EXTRACT_SAMPLES(
- ch_samplesheet_path,
- Channel.of(params.target_assemblies.collect{tag, fastaPath -> tag.strip()}.join(","))
- )
- .reads
- | map { meta, fastq ->
- new_id = meta.id - ~/_T\d+/
- [ new_id, meta + [id: new_id], fastq ]
- }
- | groupTuple()
- | branch { meta_id, meta, fastq ->
- single : fastq.size() == 1
- return [ meta.first(), fastq.flatten() ]
- multiple: fastq.size() > 1
- return [ meta.first(), fastq.flatten() ]
- }
- | set { ch_fastq }
-
- ch_versions
- | mix(EXTRACT_SAMPLES.out.versions)
- | set { ch_versions }
-
- // CAT_FASTQ
- // https://github.com/nf-core/rnaseq
- // MIT: https://github.com/nf-core/rnaseq/blob/master/LICENSE
- CAT_FASTQ (
- ch_fastq.multiple
- )
- .reads
- | mix(ch_fastq.single)
- | set { ch_cat_fastq }
-
- ch_versions
- | mix(CAT_FASTQ.out.versions.first())
- | set { ch_versions }
-
- // FASTQ_FASTQC_UMITOOLS_FASTP
- // https://github.com/nf-core/rnaseq
- // MIT: https://github.com/nf-core/rnaseq/blob/master/LICENSE
- def with_umi = false
- def skip_umi_extract = true
- def umi_discard_read = false
- FASTQ_FASTQC_UMITOOLS_FASTP (
- ch_cat_fastq,
- params.sample_prep.skip_fastqc,
- with_umi,
- skip_umi_extract,
- umi_discard_read,
- params.sample_prep.skip_fastp,
- [],
- params.sample_prep.save_trimmed,
- params.sample_prep.save_trimmed,
- params.sample_prep.min_trimmed_reads
- )
- .reads
- | set { ch_trim_reads }
-
- // SORTMERNA
- if (params.sample_prep.remove_ribo_rna) {
- Channel.from(ch_ribo_db.readLines())
- | map { row -> file(row, checkIfExists: true) }
- | collect
- | set { ch_sortmerna_fastas }
-
- SORTMERNA (
- ch_trim_reads,
- ch_sortmerna_fastas
- )
- .reads
- | set { ch_trim_reads }
-
- ch_versions
- | mix(SORTMERNA.out.versions.first())
- | set { ch_versions }
- }
-
- ch_trim_reads
- | flatMap { meta, reads ->
- def targetAssemblies = meta["target_assemblies"]
-
- readsByAssembly = []
-
- for(assembly in targetAssemblies) {
- readsByAssembly += [[[id: "${meta.id}.on.${assembly}", single_end: meta.single_end, target_assembly: assembly], reads]]
- }
-
- return readsByAssembly
- }
- | set { ch_trim_reads_by_assembly }
-
- ch_versions
- | mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions)
- | set { ch_versions }
-
- // STAR_ALIGN
- ch_assembly_index
- | map { meta, index ->
- [meta.id, index]
- }
- | cross(
- ch_trim_reads_by_assembly.map{meta, reads -> [meta.target_assembly, meta, reads]}
- )
- | map { indexWithExt, readsWithExt ->
- def index = indexWithExt[1]
-
- def readsMeta = readsWithExt[1]
- def reads = readsWithExt[2]
-
- [
- readsMeta,
- reads,
- index
- ]
- }
- | set { ch_trim_reads_by_assembly_with_index }
-
- def seq_platform = false
- def seq_center = false
- STAR_ALIGN(
- ch_trim_reads_by_assembly_with_index.map{meta, reads, index -> [meta, reads]},
- ch_trim_reads_by_assembly_with_index.map{meta, reads, index -> [[id: meta.target_assembly], index]},
- ch_trim_reads_by_assembly_with_index.map{meta, reads, index -> [[id: meta.target_assembly], []]},
- star_ignore_sjdbgtf,
- seq_platform,
- seq_center
- )
- .bam_sorted
- .tap { ch_mapped_reads }
- .map { meta, bam ->
- [
- [id: meta.target_assembly],
- bam instanceof List ? bam.find {it =~ /Aligned/} : bam
- ]
- }
- | groupTuple
- | set { ch_mapped_reads_by_assembly }
-
- ch_versions
- | mix(STAR_ALIGN.out.versions.first())
- | set { ch_versions }
-
- // SAMTOOLS_CAT
- ch_mapped_reads_by_assembly
- | branch { meta, bamList ->
- bams: bamList.size() > 1
- bam: bamList.size() <= 1
- }
- | set { ch_samtools_cat_inputs_branches }
-
- SAMTOOLS_CAT(
- ch_samtools_cat_inputs_branches.bams
- )
- .bam
- | map { meta, bam ->
- [
- meta,
- [bam]
- ]
- }
- | mix(
- ch_samtools_cat_inputs_branches.bam
- )
- | set { ch_cat_bam_by_assembly }
-
- ch_versions
- | mix(SAMTOOLS_CAT.out.versions.first())
- | set { ch_versions }
-
- // GUNZIP: external_protein_seqs
- ch_external_protein_seqs = Channel.empty()
- if(params.external_protein_seqs != null) {
- ch_external_protein_seqs = Channel.fromList(params.external_protein_seqs)
- }
-
- ch_external_protein_seqs
- | map { filePath ->
- def fileHandle = file(filePath, checkIfExists: true)
- [[id:fileHandle.getSimpleName()], fileHandle]
- }
- | branch { meta, file ->
- gz: "$file".endsWith(".gz")
- rest: !"$file".endsWith(".gz")
- }
- | set { ch_external_protein_seqs_branch }
-
- GUNZIP_EXTERNAL_PROTEIN_SEQ(
- ch_external_protein_seqs_branch.gz
- )
- .gunzip
- | mix(
- ch_external_protein_seqs_branch.rest
- )
- | set { ch_gunzip_external_protein_seqs }
-
- ch_versions
- | mix(GUNZIP_EXTERNAL_PROTEIN_SEQ.out.versions.first())
- | set { ch_versions }
-
- // CAT_PROTEIN_SEQS
- ch_gunzip_external_protein_seqs
- | map{meta, filePath -> filePath}
- | collect
- | map{fileList -> [[id:"protein_seqs"], fileList]}
- | CAT_PROTEIN_SEQS
-
- CAT_PROTEIN_SEQS.out.file_out
- | set { ch_protein_seq }
-
- ch_versions
- | mix(CAT_PROTEIN_SEQS.out.versions)
- | set { ch_versions }
-
- // BRAKER3
- REPEATMASKER.out.fasta_masked
- | mix(ch_cat_bam_by_assembly)
- | groupTuple(size: 2, remainder: true)
- | map { meta, groupedItems ->
- def maskedFasta = groupedItems[0]
-
- if(groupedItems.size() == 2) {
- def bam = groupedItems[1]
- return [meta, maskedFasta, bam]
- } else {
- return [meta, maskedFasta, []]
- }
- }
- | set { ch_braker_inputs }
-
- if(params.external_protein_seqs) {
- ch_braker_inputs
- | combine(ch_protein_seq.map{meta, filePath -> filePath})
- | set { ch_braker_inputs }
- } else {
- ch_braker_inputs
- | map{meta, assembly, bams -> [meta, assembly, bams, []]}
- | set { ch_braker_inputs }
- }
-
- ch_fasta = ch_braker_inputs.map{meta, assembly, bams, proteinSeq -> [meta, assembly]}
- ch_bam = ch_braker_inputs.map{meta, assembly, bams, proteinSeq -> bams}
- ch_rnaseq_sets_dirs = ch_braker_inputs.map{meta, assembly, bams, proteinSeq -> []}
- ch_rnaseq_sets_ids = ch_braker_inputs.map{meta, assembly, bams, proteinSeq -> []}
- ch_proteins = ch_braker_inputs.map{meta, assembly, bams, proteinSeq -> proteinSeq}
- ch_hintsfile = ch_braker_inputs.map{meta, assembly, bams, proteinSeq -> []}
-
- BRAKER3(
- ch_fasta,
- ch_bam,
- ch_rnaseq_sets_dirs,
- ch_rnaseq_sets_ids,
- ch_proteins,
- ch_hintsfile
- )
-
- ch_versions
- | mix(BRAKER3.out.versions.first())
- | set { ch_versions }
-}
\ No newline at end of file
diff --git a/workflows/pangene.nf b/workflows/pangene.nf
new file mode 100644
index 0000000..8512ff9
--- /dev/null
+++ b/workflows/pangene.nf
@@ -0,0 +1,150 @@
+include { validateParams } from '../modules/local/validate_params'
+include { PREPARE_ASSEMBLY } from '../subworkflows/local/prepare_assembly'
+include { PREPROCESS_RNASEQ } from '../subworkflows/local/preprocess_rnaseq'
+include { ALIGN_RNASEQ } from '../subworkflows/local/align_rnaseq'
+include { PREPARE_EXT_PROTS } from '../subworkflows/local/prepare_ext_prots'
+include { BRAKER3 } from '../modules/kherronism/braker3'
+include { FASTA_LIFTOFF } from '../subworkflows/local/fasta_liftoff'
+include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions'
+
+validateParams(params)
+
+workflow PANGENE {
+
+ ch_versions = Channel.empty()
+
+ ch_target_assembly = Channel.fromList(params.target_assemblies)
+ | map { tag, filePath ->
+ [ [ id: tag ], file(filePath, checkIfExists: true) ]
+ }
+
+ ch_te_library = Channel.fromList(params.te_libraries)
+ | map { tag, filePath ->
+ [ [ id:tag ], file(filePath, checkIfExists: true) ]
+ }
+
+ ch_samplesheet = params.samplesheet
+ ? Channel.fromPath(params.samplesheet, checkIfExists: true)
+ : Channel.empty()
+
+ ch_tar_assm_str = Channel.of(
+ params.target_assemblies
+ .collect { tag, fastaPath -> tag.strip() }.join(",")
+ )
+
+ ch_ribo_db = params.remove_ribo_rna
+ ? file(params.ribo_database_manifest, checkIfExists: true)
+ : null
+
+ ch_sortmerna_fastas = ch_ribo_db
+ ? Channel.from(ch_ribo_db ? ch_ribo_db.readLines() : null)
+ | map { row -> file(row, checkIfExists: true) }
+ | collect
+ : Channel.empty()
+
+ ch_ext_prot_fastas = params.external_protein_fastas
+ ? Channel.fromList(params.external_protein_fastas)
+ | map { filePath ->
+ def fileHandle = file(filePath, checkIfExists: true)
+ [ [ id: fileHandle.getSimpleName() ], fileHandle]
+ }
+ : Channel.empty()
+
+ ch_xref_mm = params.liftoff_xref_annotations
+ ? Channel.fromList(params.liftoff_xref_annotations)
+ | multiMap { fasta, gff ->
+ def fastaFile = file(fasta, checkIfExists:true)
+
+ fasta: [ [ id: fastaFile.getSimpleName() ], fastaFile ]
+ gff: [ [ id: fastaFile.getSimpleName() ], file(gff, checkIfExists:true) ]
+ }
+ : Channel.empty()
+
+ ch_xref_fasta = ch_xref_mm.fasta
+ ch_xref_gff = ch_xref_mm.gff
+
+ // SUBWORKFLOW: PREPARE_ASSEMBLY
+ PREPARE_ASSEMBLY(
+ ch_target_assembly,
+ ch_te_library,
+ params.repeat_annotator
+ )
+
+ ch_valid_target_assembly = PREPARE_ASSEMBLY.out.target_assemby
+ ch_masked_target_assembly = PREPARE_ASSEMBLY.out.masked_target_assembly
+ ch_target_assemby_index = PREPARE_ASSEMBLY.out.target_assemby_index
+ ch_versions = ch_versions.mix(PREPARE_ASSEMBLY.out.versions)
+
+ // SUBWORKFLOW: PREPROCESS_RNASEQ
+ PREPROCESS_RNASEQ(
+ ch_samplesheet,
+ ch_tar_assm_str,
+ params.skip_fastqc,
+ params.skip_fastp,
+ params.save_trimmed,
+ params.min_trimmed_reads,
+ params.remove_ribo_rna,
+ ch_sortmerna_fastas
+ )
+
+ ch_trim_reads = PREPROCESS_RNASEQ.out.trim_reads
+ ch_reads_target = PREPROCESS_RNASEQ.out.reads_target
+ ch_versions = ch_versions.mix(PREPROCESS_RNASEQ.out.versions)
+
+ // SUBWORKFLOW: ALIGN_RNASEQ
+ ALIGN_RNASEQ(
+ ch_reads_target,
+ ch_trim_reads,
+ ch_target_assemby_index
+ )
+
+ ch_rnaseq_bam = ALIGN_RNASEQ.out.bam
+ ch_versions = ch_versions.mix(ALIGN_RNASEQ.out.versions)
+
+ // MODULE: PREPARE_EXT_PROTS
+ PREPARE_EXT_PROTS(
+ ch_ext_prot_fastas
+ )
+
+ ch_ext_prots_fasta = PREPARE_EXT_PROTS.out.ext_prots_fasta
+ ch_versions = ch_versions.mix(PREPARE_EXT_PROTS.out.versions)
+
+ // MODULE: BRAKER3
+ ch_braker_inputs = ch_masked_target_assembly
+ | join(ch_rnaseq_bam, remainder: true)
+ | combine(
+ ch_ext_prots_fasta.map { meta, filePath -> filePath }.ifEmpty(null)
+ )
+ | map { meta, fasta, bam, prots -> [ meta, fasta, bam ?: [], prots ?: [] ] }
+
+ def rnaseq_sets_dirs = []
+ def rnaseq_sets_ids = []
+ def hintsfile = []
+
+ BRAKER3(
+ ch_braker_inputs.map { meta, fasta, bam, prots -> [meta, fasta] },
+ ch_braker_inputs.map { meta, fasta, bam, prots -> bam },
+ rnaseq_sets_dirs,
+ rnaseq_sets_ids,
+ ch_braker_inputs.map { meta, fasta, bam, prots -> prots },
+ hintsfile
+ )
+
+ ch_braker_gff3 = BRAKER3.out.gff3
+ ch_versions = ch_versions.mix(BRAKER3.out.versions.first())
+
+ // SUBWORKFLOW: FASTA_LIFTOFF
+ FASTA_LIFTOFF(
+ ch_valid_target_assembly,
+ ch_xref_fasta,
+ ch_xref_gff
+ )
+
+ ch_liftoff_gff3 = FASTA_LIFTOFF.out.gff3
+ ch_versions = ch_versions.mix(FASTA_LIFTOFF.out.versions)
+
+ // MODULE: CUSTOM_DUMPSOFTWAREVERSIONS
+ CUSTOM_DUMPSOFTWAREVERSIONS (
+ ch_versions.unique().collectFile(name: 'collated_versions.yml')
+ )
+}
|