From c8667c916e5a96c23f2333d3403389d1674f3774 Mon Sep 17 00:00:00 2001 From: liamlelievre Date: Wed, 4 Dec 2024 13:00:25 +1300 Subject: [PATCH 01/23] Add EXTRACT_CDS feature to GFF_STORE workflow --- conf/modules.config | 12 ++++++++++++ subworkflows/local/gff_store.nf | 13 +++++++++++++ 2 files changed, 25 insertions(+) diff --git a/conf/modules.config b/conf/modules.config index ba2bb72..d68fac3 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -295,6 +295,18 @@ process { // SUBWORKFLOW: GFF_STORE saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] } + + withName: '.*:GFF_STORE:EXTRACT_CDS' { + ext.args = '-x' + ext.prefix = { "${meta.id}.cds" } + + publishDir = [ + path: { "${params.outdir}/annotations/$meta.id" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + } process { // SUBWORKFLOW: FASTA_ORTHOFINDER diff --git a/subworkflows/local/gff_store.nf b/subworkflows/local/gff_store.nf index 2c877a9..48e89c9 100644 --- a/subworkflows/local/gff_store.nf +++ b/subworkflows/local/gff_store.nf @@ -2,6 +2,7 @@ import java.net.URLEncoder include { GT_GFF3 as FINAL_GFF_CHECK } from '../../modules/nf-core/gt/gff3/main' include { GFFREAD as EXTRACT_PROTEINS } from '../../modules/nf-core/gffread/main' +include { GFFREAD as EXTRACT_CDS } from '../../modules/nf-core/gffread/main' workflow GFF_STORE { take: @@ -133,9 +134,21 @@ workflow GFF_STORE { ch_final_proteins = EXTRACT_PROTEINS.out.gffread_fasta ch_versions = ch_versions.mix(EXTRACT_PROTEINS.out.versions.first()) +// MODULE: GFFREAD as EXTRACT_CDS + ch_cds_extraction_inputs = ch_final_gff | join(ch_fasta) + + EXTRACT_CDS( + ch_cds_extraction_inputs.map { meta, gff, fasta -> [ meta, gff ] }, + ch_cds_extraction_inputs.map { meta, gff, fasta -> fasta }, + ext.args: '-w' // Extract CDS + ) + + ch_final_cds = EXTRACT_CDS.out.gffread_fasta + ch_versions = ch_versions.mix(EXTRACT_CDS.out.versions.first()) emit: final_gff = ch_final_gff // [ meta, gff ] final_proteins = ch_final_proteins // [ meta, fasta ] + final_cds = ch_final_cds // [ meta, fasta ] versions = ch_versions // [ versions.yml ] } From dbebd7a7a61684df03d9a71e19affb0f33d60d5b Mon Sep 17 00:00:00 2001 From: Liam Date: Wed, 4 Dec 2024 14:55:24 +1300 Subject: [PATCH 02/23] Update gff_store.nf Correct indents and whitespaces --- subworkflows/local/gff_store.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/gff_store.nf b/subworkflows/local/gff_store.nf index 48e89c9..075863e 100644 --- a/subworkflows/local/gff_store.nf +++ b/subworkflows/local/gff_store.nf @@ -2,7 +2,7 @@ import java.net.URLEncoder include { GT_GFF3 as FINAL_GFF_CHECK } from '../../modules/nf-core/gt/gff3/main' include { GFFREAD as EXTRACT_PROTEINS } from '../../modules/nf-core/gffread/main' -include { GFFREAD as EXTRACT_CDS } from '../../modules/nf-core/gffread/main' +include { GFFREAD as EXTRACT_CDS } from '../../modules/nf-core/gffread/main' workflow GFF_STORE { take: @@ -134,7 +134,7 @@ workflow GFF_STORE { ch_final_proteins = EXTRACT_PROTEINS.out.gffread_fasta ch_versions = ch_versions.mix(EXTRACT_PROTEINS.out.versions.first()) -// MODULE: GFFREAD as EXTRACT_CDS + // MODULE: GFFREAD as EXTRACT_CDS ch_cds_extraction_inputs = ch_final_gff | join(ch_fasta) EXTRACT_CDS( From 203af9bd3852ad471fcede3ecb9511b745d182a8 Mon Sep 17 00:00:00 2001 From: Liam Date: Wed, 4 Dec 2024 15:20:45 +1300 Subject: [PATCH 03/23] Update gff_store.nf --- subworkflows/local/gff_store.nf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/subworkflows/local/gff_store.nf b/subworkflows/local/gff_store.nf index 075863e..13eed6f 100644 --- a/subworkflows/local/gff_store.nf +++ b/subworkflows/local/gff_store.nf @@ -135,12 +135,12 @@ workflow GFF_STORE { ch_versions = ch_versions.mix(EXTRACT_PROTEINS.out.versions.first()) // MODULE: GFFREAD as EXTRACT_CDS - ch_cds_extraction_inputs = ch_final_gff | join(ch_fasta) + ch_cds_extraction_inputs = ch_final_gff + | join(ch_fasta) EXTRACT_CDS( ch_cds_extraction_inputs.map { meta, gff, fasta -> [ meta, gff ] }, - ch_cds_extraction_inputs.map { meta, gff, fasta -> fasta }, - ext.args: '-w' // Extract CDS + ch_cds_extraction_inputs.map { meta, gff, fasta -> fasta } ) ch_final_cds = EXTRACT_CDS.out.gffread_fasta @@ -149,6 +149,6 @@ workflow GFF_STORE { emit: final_gff = ch_final_gff // [ meta, gff ] final_proteins = ch_final_proteins // [ meta, fasta ] - final_cds = ch_final_cds // [ meta, fasta ] + final_cds = ch_final_cds // [ meta, fasta ] versions = ch_versions // [ versions.yml ] } From 13fe4e9cb55d79e86d1ed328848fda59fc9d90c1 Mon Sep 17 00:00:00 2001 From: Liam Date: Wed, 4 Dec 2024 15:39:50 +1300 Subject: [PATCH 04/23] add EXTRACT_CDNA to gff_store.nf --- subworkflows/local/gff_store.nf | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/subworkflows/local/gff_store.nf b/subworkflows/local/gff_store.nf index 13eed6f..120d768 100644 --- a/subworkflows/local/gff_store.nf +++ b/subworkflows/local/gff_store.nf @@ -3,6 +3,7 @@ import java.net.URLEncoder include { GT_GFF3 as FINAL_GFF_CHECK } from '../../modules/nf-core/gt/gff3/main' include { GFFREAD as EXTRACT_PROTEINS } from '../../modules/nf-core/gffread/main' include { GFFREAD as EXTRACT_CDS } from '../../modules/nf-core/gffread/main' +include { GFFREAD as EXTRACT_CDNA } from '../../modules/nf-core/gffread/main' workflow GFF_STORE { take: @@ -146,9 +147,21 @@ workflow GFF_STORE { ch_final_cds = EXTRACT_CDS.out.gffread_fasta ch_versions = ch_versions.mix(EXTRACT_CDS.out.versions.first()) + // MODULE: GFFREAD as EXTRACT_CDNA + ch_cdna_extraction_inputs = ch_final_gff + | join(ch_fasta) + + EXTRACT_CDNA( + ch_cdna_extraction_inputs.map { meta, gff, fasta -> [ meta, gff ] }, + ch_cdna_extraction_inputs.map { meta, gff, fasta -> fasta} + + ch_final_cdna = EXTRACT_CDNA.out.gffread_fasta + ch_versions = ch_versions.mix(EXTRACT_CDNA.out.versions.first()) + emit: final_gff = ch_final_gff // [ meta, gff ] final_proteins = ch_final_proteins // [ meta, fasta ] final_cds = ch_final_cds // [ meta, fasta ] + final_cdna = ch_final_cdna // [ meta, fasta ] versions = ch_versions // [ versions.yml ] } From c7bf40b1ac4d1c1e7c994bd7a0eb59b24f37775e Mon Sep 17 00:00:00 2001 From: Liam Date: Wed, 4 Dec 2024 15:41:04 +1300 Subject: [PATCH 05/23] add GFF_STORE:EXTRACT_CDNA to modules.config --- conf/modules.config | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/conf/modules.config b/conf/modules.config index d68fac3..562b6a0 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -306,7 +306,16 @@ process { // SUBWORKFLOW: GFF_STORE saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] } + withName: '.*:GFF_STORE:EXTRACT_CDNA' { + ext.args = '-w' + ext.prefix = { "${meta.id}.cdna" } + publishDir = [ + path: { "${params.outdir}/annotations/$meta.id" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } } process { // SUBWORKFLOW: FASTA_ORTHOFINDER From f0699a60383a55cb606159ec069ce127ef704f41 Mon Sep 17 00:00:00 2001 From: Liam Date: Wed, 4 Dec 2024 15:42:55 +1300 Subject: [PATCH 06/23] Update gff_store.nf --- subworkflows/local/gff_store.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/subworkflows/local/gff_store.nf b/subworkflows/local/gff_store.nf index 120d768..91f845d 100644 --- a/subworkflows/local/gff_store.nf +++ b/subworkflows/local/gff_store.nf @@ -154,6 +154,7 @@ workflow GFF_STORE { EXTRACT_CDNA( ch_cdna_extraction_inputs.map { meta, gff, fasta -> [ meta, gff ] }, ch_cdna_extraction_inputs.map { meta, gff, fasta -> fasta} + ) ch_final_cdna = EXTRACT_CDNA.out.gffread_fasta ch_versions = ch_versions.mix(EXTRACT_CDNA.out.versions.first()) From 563bd13cdd9e50d535875ed12ace09a1b59b7874 Mon Sep 17 00:00:00 2001 From: Liam Date: Wed, 4 Dec 2024 16:37:24 +1300 Subject: [PATCH 07/23] Add cdna and cds outputs to output.md --- docs/output.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index 44e598a..6c4c9c8 100644 --- a/docs/output.md +++ b/docs/output.md @@ -168,7 +168,9 @@ If more than one genome is included in the pipeline, [ORTHOFINDER](https://githu - `annotations/` - `Y/` - `Y.gt.gff3`: Final annotation file for genome `Y` which contains gene models and their functional annotations - - `Y.pep.fasta`: Protein sequences for the gene models + - `Y.pep.fasta`: Protein sequences for the gene models\ + - 'Y.cdna.fasta': cDNA sequences for the gene models\ + - 'Y.cds.fasta': Coding sequences for the gene models\ From 79befa552c36e73826f83b24fa9057644389dbee Mon Sep 17 00:00:00 2001 From: Liam Date: Wed, 4 Dec 2024 16:41:15 +1300 Subject: [PATCH 08/23] Add notes about cdna and cds update to CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 79627bc..c4dc410 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 10. Added an HTML report [#44](https://github.com/Plant-Food-Research-Open/genepal/issues/44) 11. Added content type as text/html for the MultiQC and genepal reports 12. Added sra-tools for RNASeq data download [#102](https://github.com/Plant-Food-Research-Open/genepal/issues/102) +13. Added cDNA and cds outputs to /annotations/ directory [#118](https://github.com/Plant-Food-Research-Open/genepal/issues/118) ### `Fixed` From 47e2cf33e592b11764e1aad81b0f9cd0b0fc8700 Mon Sep 17 00:00:00 2001 From: Liam Date: Wed, 4 Dec 2024 16:48:57 +1300 Subject: [PATCH 09/23] Added liamlelievre to contributors - README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 766d5fe..5e68c75 100644 --- a/README.md +++ b/README.md @@ -139,6 +139,7 @@ The pipeline uses nf-core modules contributed by following authors: + ## Contributions and Support From 2a118be20202ef2ba9b6836f839159c8ed5a8227 Mon Sep 17 00:00:00 2001 From: Liam Date: Wed, 4 Dec 2024 17:02:15 +1300 Subject: [PATCH 10/23] Update output.md removed end of line "/" --- docs/output.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/output.md b/docs/output.md index 6c4c9c8..f4793b5 100644 --- a/docs/output.md +++ b/docs/output.md @@ -168,9 +168,9 @@ If more than one genome is included in the pipeline, [ORTHOFINDER](https://githu - `annotations/` - `Y/` - `Y.gt.gff3`: Final annotation file for genome `Y` which contains gene models and their functional annotations - - `Y.pep.fasta`: Protein sequences for the gene models\ - - 'Y.cdna.fasta': cDNA sequences for the gene models\ - - 'Y.cds.fasta': Coding sequences for the gene models\ + - `Y.pep.fasta`: Protein sequences for the gene models + - 'Y.cdna.fasta': cDNA sequences for the gene models + - 'Y.cds.fasta': Coding sequences for the gene models From 216225c8d4977705315f6b1af539c9c79a522b0c Mon Sep 17 00:00:00 2001 From: Liam Date: Wed, 4 Dec 2024 17:03:59 +1300 Subject: [PATCH 11/23] Added v0.6.0 notes to CHANGELOG.md --- CHANGELOG.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c4dc410..67872fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v0.6.0 - [4-Dec-2024] + +### 'Added' + +1. Added cDNA and cds outputs to /annotations/ directory [#118](https://github.com/Plant-Food-Research-Open/genepal/issues/118) + ## v0.5.0 - [21-Nov-2024] ### `Added` @@ -19,8 +25,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 10. Added an HTML report [#44](https://github.com/Plant-Food-Research-Open/genepal/issues/44) 11. Added content type as text/html for the MultiQC and genepal reports 12. Added sra-tools for RNASeq data download [#102](https://github.com/Plant-Food-Research-Open/genepal/issues/102) -13. Added cDNA and cds outputs to /annotations/ directory [#118](https://github.com/Plant-Food-Research-Open/genepal/issues/118) - + ### `Fixed` 1. Now using `${meta.id}_trim` as prefix for `FASTQC` files From 74cd2b2fafd5af099f2fc34b5dc6219288662e14 Mon Sep 17 00:00:00 2001 From: Liam Date: Wed, 4 Dec 2024 17:08:40 +1300 Subject: [PATCH 12/23] removed trailing whitespace gff_store.nf --- subworkflows/local/gff_store.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/gff_store.nf b/subworkflows/local/gff_store.nf index 91f845d..2ce008f 100644 --- a/subworkflows/local/gff_store.nf +++ b/subworkflows/local/gff_store.nf @@ -162,7 +162,7 @@ workflow GFF_STORE { emit: final_gff = ch_final_gff // [ meta, gff ] final_proteins = ch_final_proteins // [ meta, fasta ] - final_cds = ch_final_cds // [ meta, fasta ] + final_cds = ch_final_cds // [ meta, fasta ] final_cdna = ch_final_cdna // [ meta, fasta ] versions = ch_versions // [ versions.yml ] } From 81871ff0c7089047fb62de1d40c6fecc09790bd7 Mon Sep 17 00:00:00 2001 From: Liam Date: Wed, 4 Dec 2024 17:09:21 +1300 Subject: [PATCH 13/23] Removed trailing whitespace - modules.config --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 562b6a0..9bf1c5f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -295,7 +295,7 @@ process { // SUBWORKFLOW: GFF_STORE saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] } - + withName: '.*:GFF_STORE:EXTRACT_CDS' { ext.args = '-x' ext.prefix = { "${meta.id}.cds" } From 3f898b0ee6f1a4fa7de22d5ec810629e79d73ce9 Mon Sep 17 00:00:00 2001 From: Liam Date: Wed, 4 Dec 2024 17:11:55 +1300 Subject: [PATCH 14/23] rename params - modules.config params.add_attrs_to_proteins_fasta to params.add_attrs_to_gffread_fastas --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 9bf1c5f..5d38b7d 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -286,7 +286,7 @@ process { // SUBWORKFLOW: GFF_STORE } withName: '.*:GFF_STORE:EXTRACT_PROTEINS' { - ext.args = params.add_attrs_to_proteins_fasta ? '-F -D -y' : '-y' + ext.args = params.add_attrs_to_gffread_fastas ? '-F -D -y' : '-y' ext.prefix = { "${meta.id}.pep" } publishDir = [ From 5af18c85a3597d6d13bb026c8a622a543af800bd Mon Sep 17 00:00:00 2001 From: Liam Date: Wed, 4 Dec 2024 17:15:10 +1300 Subject: [PATCH 15/23] Rename params - nextflow.config Rename params.add_attrs_to_proteins_fasta to params.add_attrs_to_gffread_fastas --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 665e6b3..91c9962 100644 --- a/nextflow.config +++ b/nextflow.config @@ -57,7 +57,7 @@ params { // Annotation output options braker_save_outputs = false - add_attrs_to_proteins_fasta = false + add_attrs_to_gffread_fastas = false // Evaluation options busco_skip = false From 841ea020b7bcf632f6ce5a5ff1e30c8438c9ae89 Mon Sep 17 00:00:00 2001 From: Usman Rashid Date: Thu, 5 Dec 2024 12:21:32 +1300 Subject: [PATCH 16/23] Added code contributors --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 5e68c75..0cc92ea 100644 --- a/README.md +++ b/README.md @@ -97,7 +97,7 @@ sbatch ./pfr_genepal plant-food-research-open/genepal workflows were originally scripted by Jason Shiller ([@jasonshiller](https://github.com/jasonshiller)). Usman Rashid ([@gallvp](https://github.com/gallvp)) wrote the Nextflow pipeline. -We thank the following people for their extensive assistance in the development of this pipeline: +We thank the following people for extensive assistance in the development of the pipeline, - Cecilia Deng [@CeciliaDeng](https://github.com/CeciliaDeng) - Charles David [@charlesdavid](https://github.com/charlesdavid) @@ -107,6 +107,10 @@ We thank the following people for their extensive assistance in the development - Susan Thomson [@cflsjt](https://github.com/cflsjt) - Ting-Hsuan Chen [@ting-hsuan-chen](https://github.com/ting-hsuan-chen) +and for contributions to the codebase, + +- Liam Le Lievre [@liamlelievre](https://github.com/liamlelievre) + The pipeline uses nf-core modules contributed by following authors: From e05a46915b363968f91777f2af47225adcca039c Mon Sep 17 00:00:00 2001 From: liamlelievre Date: Thu, 5 Dec 2024 14:24:52 +1300 Subject: [PATCH 17/23] Run nf-test successfully in minimal and stub --- .nf-core.yml | 2 +- CHANGELOG.md | 2 +- CITATION.cff | 2 +- assets/multiqc_config.yml | 2 +- conf/modules.config | 2 +- docs/parameters.md | 37 ++++++++++++++++++++------------- nextflow.config | 4 ++-- nextflow_schema.json | 12 +++++------ tests/minimal/main.nf.test.snap | 22 ++++++++++++++------ tests/stub/main.nf.test.snap | 26 +++++++++++++++++------ 10 files changed, 71 insertions(+), 40 deletions(-) diff --git a/.nf-core.yml b/.nf-core.yml index 260014a..1641b31 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -30,5 +30,5 @@ template: outdir: . skip_features: - igenomes - version: 0.5.0 + version: 0.6.0 update: null diff --git a/CHANGELOG.md b/CHANGELOG.md index 67872fc..ada72ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,7 +25,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 10. Added an HTML report [#44](https://github.com/Plant-Food-Research-Open/genepal/issues/44) 11. Added content type as text/html for the MultiQC and genepal reports 12. Added sra-tools for RNASeq data download [#102](https://github.com/Plant-Food-Research-Open/genepal/issues/102) - + ### `Fixed` 1. Now using `${meta.id}_trim` as prefix for `FASTQC` files diff --git a/CITATION.cff b/CITATION.cff index c5c6c26..4a2ad92 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -31,7 +31,7 @@ authors: - family-names: "Thomson" given-names: "Susan" title: "genepal: A Nextflow pipeline for genome and pan-genome annotation" -version: 0.5.0 +version: 0.6.0 date-released: 2024-11-21 url: "https://github.com/Plant-Food-Research-Open/genepal" doi: 10.5281/zenodo.14195006 diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 8969c04..6682724 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > This report has been generated by the plant-food-research-open/genepal analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "plant-food-research-open-genepal-methods-description": diff --git a/conf/modules.config b/conf/modules.config index 5d38b7d..ca741e9 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -286,7 +286,7 @@ process { // SUBWORKFLOW: GFF_STORE } withName: '.*:GFF_STORE:EXTRACT_PROTEINS' { - ext.args = params.add_attrs_to_gffread_fastas ? '-F -D -y' : '-y' + ext.args = params.add_attrs_to_proteins_cds_fastas ? '-F -D -y' : '-y' ext.prefix = { "${meta.id}.pep" } publishDir = [ diff --git a/docs/parameters.md b/docs/parameters.md index 1c96a8b..c9eaa7e 100644 --- a/docs/parameters.md +++ b/docs/parameters.md @@ -4,17 +4,19 @@ A Nextflow pipeline for consensus, phased and pan-genome annotation. ## Input/output options -| Parameter | Description | Type | Default | Required | Hidden | -| ------------------------- | -------------------------------------------------------------------------------------------------------- | --------- | ------- | -------- | ------ | -| `input` | Target assemblies listed in a CSV sheet | `string` | | True | | -| `protein_evidence` | Protein evidence provided as a fasta file or multiple fasta files listed in a plain txt file | `string` | | True | | -| `eggnogmapper_db_dir` | Eggnogmapper database directory | `string` | | | | -| `eggnogmapper_tax_scope` | Eggnogmapper taxonomy scopre. Eukaryota: 2759, Viridiplantae: 33090, Archaea: 2157, Bacteria: 2, root: 1 | `integer` | 1 | | | -| `rna_evidence` | FASTQ/BAM samples listed in a CSV sheet | `string` | | | | -| `liftoff_annotations` | Reference annotations listed in a CSV sheet | `string` | | | | -| `orthofinder_annotations` | Additional annotations for orthology listed in a CSV sheet | `string` | | | | -| `outdir` | The output directory where the results will be saved | `string` | | True | | -| `email` | Email address for completion summary. | `string` | | | True | +| Parameter | Description | Type | Default | Required | Hidden | +| ------------------------ | -------------------------------------------------------------------------------------------------------- | --------- | ------- | -------- | ------ | +| `input` | Target assemblies listed in a CSV sheet | `string` | | True | | +| `protein_evidence` | Protein evidence provided as a fasta file or multiple fasta files listed in a plain txt file | `string` | | True | | +| `eggnogmapper_db_dir` | Eggnogmapper database directory | `string` | | | | +| `eggnogmapper_tax_scope` | Eggnogmapper taxonomy scopre. Eukaryota: 2759, Viridiplantae: 33090, Archaea: 2157, Bacteria: 2, root: 1 | `integer` | 1 | | + +| +| `rna_evidence` | FASTQ/BAM samples listed in a CSV sheet | `string` | | | | +| `liftoff_annotations` | Reference annotations listed in a CSV sheet | `string` | | | | +| `orthofinder_annotations` | Additional annotations for orthology listed in a CSV sheet | `string` | | | | +| `outdir` | The output directory where the results will be saved | `string` | | True | | +| `email` | Email address for completion summary. | `string` | | | True | ## Repeat annotation options @@ -68,10 +70,9 @@ A Nextflow pipeline for consensus, phased and pan-genome annotation. ## Annotation output options -| Parameter | Description | Type | Default | Required | Hidden | -| ----------------------------- | ------------------------------------ | --------- | ------- | -------- | ------ | -| `braker_save_outputs` | Save BRAKER files | `boolean` | | | | -| `add_attrs_to_proteins_fasta` | Add gff attributes to proteins fasta | `boolean` | | | | +| Parameter | Description | Type | Default | Required | Hidden | +| --------------------- | ----------------- | --------- | ------- | -------- | ------ | +| `braker_save_outputs` | Save BRAKER files | `boolean` | | | | ## Evaluation options @@ -104,3 +105,9 @@ Less common options for the pipeline, typically set in a config file. | `max_multiqc_email_size` | File size limit when attaching MultiQC reports to summary emails. | `string` | 25.MB | | True | | `monochrome_logs` | Do not use coloured log outputs. | `boolean` | | | True | | `hook_url` | Incoming hook URL for messaging service | `string` | | | True | + +## Other parameters + +| Parameter | Description | Type | Default | Required | Hidden | +| ----------------------------- | ----------- | --------- | ------- | -------- | ------ | +| `add_attrs_to_gffread_fastas` | | `boolean` | | | | diff --git a/nextflow.config b/nextflow.config index 91c9962..c91980e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -57,7 +57,7 @@ params { // Annotation output options braker_save_outputs = false - add_attrs_to_gffread_fastas = false + add_attrs_to_proteins_cds_fastas = false // Evaluation options busco_skip = false @@ -261,7 +261,7 @@ manifest { description = """A Nextflow pipeline for consensus, phased and pan-genome annotation.""" mainScript = 'main.nf' nextflowVersion = '!>=24.04.2' - version = '0.5.0' + version = '0.6.0' doi = 'https://doi.org/10.5281/zenodo.14195006' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 0258683..2cc3f8d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -286,11 +286,6 @@ "type": "boolean", "description": "Save BRAKER files", "fa_icon": "fas fa-question-circle" - }, - "add_attrs_to_proteins_fasta": { - "type": "boolean", - "fa_icon": "fas fa-question-circle", - "description": "Add gff attributes to proteins fasta" } } }, @@ -438,5 +433,10 @@ { "$ref": "#/$defs/generic_options" } - ] + ], + "properties": { + "add_attrs_to_proteins_cds_fastas": { + "type": "boolean" + } + } } diff --git a/tests/minimal/main.nf.test.snap b/tests/minimal/main.nf.test.snap index 02c228f..b1f61c8 100644 --- a/tests/minimal/main.nf.test.snap +++ b/tests/minimal/main.nf.test.snap @@ -2,7 +2,7 @@ "profile - test": { "content": [ { - "successful tasks": 18, + "successful tasks": 20, "versions": { "AGAT_CONVERTSPGFF2GTF": { "agat": "v1.4.0" @@ -25,6 +25,12 @@ "CAT_PROTEIN_FASTAS": { "pigz": "2.3.4" }, + "EXTRACT_CDNA": { + "gffread": "0.12.7" + }, + "EXTRACT_CDS": { + "gffread": "0.12.7" + }, "EXTRACT_PROTEINS": { "gffread": "0.12.7" }, @@ -55,10 +61,12 @@ "tsebra": "1.1.2.5" }, "Workflow": { - "plant-food-research-open/genepal": "v0.5.0" + "plant-food-research-open/genepal": "v0.6.0" } }, "stable paths": [ + "a_thaliana.cdna.fasta:md5,12b9bef973e488640aec8c04ba3882fe", + "a_thaliana.cds.fasta:md5,b81060419355a590560f92aec8536281", "a_thaliana.gt.gff3:md5,8ab16549095f605ff8715ac4a3de58ed", "a_thaliana.pep.fasta:md5,4994c0393ca0245a1c57966d846d101e", "a_thaliana.gff3:md5,d23d16cd86499d48a30ffb981ed27891", @@ -67,6 +75,8 @@ "stable names": [ "annotations", "annotations/a_thaliana", + "annotations/a_thaliana/a_thaliana.cdna.fasta", + "annotations/a_thaliana/a_thaliana.cds.fasta", "annotations/a_thaliana/a_thaliana.gt.gff3", "annotations/a_thaliana/a_thaliana.pep.fasta", "etc", @@ -81,9 +91,9 @@ } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.04.2" }, - "timestamp": "2024-11-19T11:35:02.477202" + "timestamp": "2024-12-05T07:51:43.818374" } -} +} \ No newline at end of file diff --git a/tests/stub/main.nf.test.snap b/tests/stub/main.nf.test.snap index 524ce21..3d3949b 100644 --- a/tests/stub/main.nf.test.snap +++ b/tests/stub/main.nf.test.snap @@ -2,7 +2,7 @@ "full - stub": { "content": [ { - "successful tasks": 154, + "successful tasks": 162, "versions": { "AGAT_CONVERTSPGFF2GTF": { "agat": "v1.4.0" @@ -55,6 +55,12 @@ "EGGNOGMAPPER": { "eggnog-mapper": "2.1.12" }, + "EXTRACT_CDNA": { + "gffread": "0.12.7" + }, + "EXTRACT_CDS": { + "gffread": "0.12.7" + }, "EXTRACT_PROTEINS": { "gffread": "0.12.7" }, @@ -143,25 +149,33 @@ "tsebra": "1.1.2.5" }, "Workflow": { - "plant-food-research-open/genepal": "v0.5.0" + "plant-food-research-open/genepal": "v0.6.0" } }, "stable paths": [ + "donghong.cdna.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", + "donghong.cds.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", "donghong.emapper.annotations:md5,d41d8cd98f00b204e9800998ecf8427e", "donghong.emapper.hits:md5,d41d8cd98f00b204e9800998ecf8427e", "donghong.emapper.seed_orthologs:md5,d41d8cd98f00b204e9800998ecf8427e", "donghong.gt.gff3:md5,d41d8cd98f00b204e9800998ecf8427e", "donghong.pep.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", + "red5_v2p1.cdna.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", + "red5_v2p1.cds.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", "red5_v2p1.emapper.annotations:md5,d41d8cd98f00b204e9800998ecf8427e", "red5_v2p1.emapper.hits:md5,d41d8cd98f00b204e9800998ecf8427e", "red5_v2p1.emapper.seed_orthologs:md5,d41d8cd98f00b204e9800998ecf8427e", "red5_v2p1.gt.gff3:md5,d41d8cd98f00b204e9800998ecf8427e", "red5_v2p1.pep.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", + "red5_v3.cdna.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", + "red5_v3.cds.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", "red5_v3.emapper.annotations:md5,d41d8cd98f00b204e9800998ecf8427e", "red5_v3.emapper.hits:md5,d41d8cd98f00b204e9800998ecf8427e", "red5_v3.emapper.seed_orthologs:md5,d41d8cd98f00b204e9800998ecf8427e", "red5_v3.gt.gff3:md5,d41d8cd98f00b204e9800998ecf8427e", "red5_v3.pep.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", + "red7_v5.cdna.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", + "red7_v5.cds.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", "red7_v5.emapper.annotations:md5,d41d8cd98f00b204e9800998ecf8427e", "red7_v5.emapper.hits:md5,d41d8cd98f00b204e9800998ecf8427e", "red7_v5.emapper.seed_orthologs:md5,d41d8cd98f00b204e9800998ecf8427e", @@ -188,9 +202,9 @@ } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.04.2" }, - "timestamp": "2024-11-21T12:34:14.056074" + "timestamp": "2024-12-05T07:56:38.915238" } -} +} \ No newline at end of file From d2ff47e882f580cdbccd6a18b53bfa66d50b9f3e Mon Sep 17 00:00:00 2001 From: liamlelievre Date: Thu, 5 Dec 2024 14:41:31 +1300 Subject: [PATCH 18/23] Run nf-test successfully in minimal and stub, renamed attr, updated docs --- docs/parameters.md | 142 ++++++++++++++++++++++++------------------- nextflow_schema.json | 12 ++-- 2 files changed, 84 insertions(+), 70 deletions(-) diff --git a/docs/parameters.md b/docs/parameters.md index c9eaa7e..ef2bce2 100644 --- a/docs/parameters.md +++ b/docs/parameters.md @@ -4,19 +4,20 @@ A Nextflow pipeline for consensus, phased and pan-genome annotation. ## Input/output options -| Parameter | Description | Type | Default | Required | Hidden | -| ------------------------ | -------------------------------------------------------------------------------------------------------- | --------- | ------- | -------- | ------ | -| `input` | Target assemblies listed in a CSV sheet | `string` | | True | | -| `protein_evidence` | Protein evidence provided as a fasta file or multiple fasta files listed in a plain txt file | `string` | | True | | -| `eggnogmapper_db_dir` | Eggnogmapper database directory | `string` | | | | -| `eggnogmapper_tax_scope` | Eggnogmapper taxonomy scopre. Eukaryota: 2759, Viridiplantae: 33090, Archaea: 2157, Bacteria: 2, root: 1 | `integer` | 1 | | - -| -| `rna_evidence` | FASTQ/BAM samples listed in a CSV sheet | `string` | | | | -| `liftoff_annotations` | Reference annotations listed in a CSV sheet | `string` | | | | -| `orthofinder_annotations` | Additional annotations for orthology listed in a CSV sheet | `string` | | | | -| `outdir` | The output directory where the results will be saved | `string` | | True | | -| `email` | Email address for completion summary. | `string` | | | True | +| Parameter | Description | Type | Default | Required | Hidden | +| ------------------------------------------ | ------------------------------------------------------------------------ | -------- | ------- | -------- | ------ | +| `input` | Target assemblies listed in a CSV sheet | `string` | | True | | +| `protein_evidence` | Protein evidence provided as a fasta file or multiple fasta files listed | +| in a plain txt file | `string` | | True | | +| `eggnogmapper_db_dir` | Eggnogmapper database directory | `string` | | | | +| `eggnogmapper_tax_scope` | Eggnogmapper taxonomy scopre. Eukaryota: 2759, Viridiplantae: | +| 33090, Archaea: 2157, Bacteria: 2, root: 1 | `integer` | 1 | | | +| `rna_evidence` | FASTQ/BAM samples listed in a CSV sheet | `string` | | | | +| `liftoff_annotations` | Reference annotations listed in a CSV sheet | `string` | | | | +| `orthofinder_annotations` | Additional annotations for orthology listed in a CSV sheet | +| `string` | | | | +| `outdir` | The output directory where the results will be saved | `string` | | True | | +| `email` | Email address for completion summary. | `string` | | | True | ## Repeat annotation options @@ -29,68 +30,84 @@ A Nextflow pipeline for consensus, phased and pan-genome annotation. ## RNASeq pre-processing options -| Parameter | Description | Type | Default | Required | Hidden | -| ------------------------ | ------------------------------------------------------------------ | --------- | ----------------------------------------- | -------- | ------ | -| `fastqc_skip` | Skip FASTQC or not? | `boolean` | True | | | -| `fastp_skip` | Skip trimming by FASTQP or not? | `boolean` | | | | -| `min_trimmed_reads` | Exclude a sample if its reads after trimming are below this number | `integer` | 10000 | | | -| `fastp_extra_args` | Extra FASTP arguments | `string` | | | | -| `save_trimmed` | Save FASTQ files after trimming or not? | `boolean` | | | | -| `remove_ribo_rna` | Remove Ribosomal RNA or not? | `boolean` | | | | -| `save_non_ribo_reads` | Save FASTQ files after Ribosomal RNA removal or not? | `boolean` | | | | -| `ribo_database_manifest` | Ribosomal RNA fastas listed in a text sheet | `string` | ${projectDir}/assets/rrna-db-defaults.txt | | | +| Parameter | Description | Type | Default | Required | Hidden | +| ----------------------------------------- | ------------------------------------------------------------------ | --------- | ------- | -------- | ------ | +| `fastqc_skip` | Skip FASTQC or not? | `boolean` | True | | | +| `fastp_skip` | Skip trimming by FASTQP or not? | `boolean` | | | | +| `min_trimmed_reads` | Exclude a sample if its reads after trimming are below this number | +| `integer` | 10000 | | | +| `fastp_extra_args` | Extra FASTP arguments | `string` | | | | +| `save_trimmed` | Save FASTQ files after trimming or not? | `boolean` | | | | +| `remove_ribo_rna` | Remove Ribosomal RNA or not? | `boolean` | | | | +| `save_non_ribo_reads` | Save FASTQ files after Ribosomal RNA removal or not? | `boolean` | | +| | +| `ribo_database_manifest` | Ribosomal RNA fastas listed in a text sheet | `string` | +| ${projectDir}/assets/rrna-db-defaults.txt | | | ## RNASeq alignment options -| Parameter | Description | Type | Default | Required | Hidden | -| ------------------------ | ------------------------------------------------- | --------- | ------- | -------- | ------ | -| `star_max_intron_length` | Maximum intron length for STAR alignment | `integer` | 16000 | | | -| `star_align_extra_args` | EXTRA arguments for STAR | `string` | | | | -| `star_save_outputs` | Save BAM files from STAR or not? | `boolean` | | | | -| `save_cat_bam` | SAVE a concatenated BAM file per assembly or not? | `boolean` | | | | +| Parameter | Description | Type | Default | Required | Hidden | +| ------------------------ | ---------------------------------------- | --------- | ------- | -------- | ------ | +| `star_max_intron_length` | Maximum intron length for STAR alignment | `integer` | 16000 | | + +| +| `star_align_extra_args` | EXTRA arguments for STAR | `string` | | | | +| `star_save_outputs` | Save BAM files from STAR or not? | `boolean` | | | | +| `save_cat_bam` | SAVE a concatenated BAM file per assembly or not? | `boolean` | | | | ## Annotation options -| Parameter | Description | Type | Default | Required | Hidden | -| --------------------- | --------------------------------------------------------------------------------- | --------- | ------- | -------- | ------ | -| `braker_extra_args` | Extra arguments for BRAKER | `string` | | | | -| `liftoff_coverage` | Liftoff coverage parameter | `number` | 0.9 | | | -| `liftoff_identity` | Liftoff identity parameter | `number` | 0.9 | | | -| `eggnogmapper_evalue` | Only report alignments below or equal the e-value threshold | `number` | 1e-05 | | | -| `eggnogmapper_pident` | Only report alignments above or equal to the given percentage of identity (0-100) | `integer` | 35 | | | +| Parameter | Description | Type | Default | Required | Hidden | +| --------------------- | ---------------------------------------------------------------- | -------- | ------- | -------- | ------ | +| `braker_extra_args` | Extra arguments for BRAKER | `string` | | | | +| `liftoff_coverage` | Liftoff coverage parameter | `number` | 0.9 | | | +| `liftoff_identity` | Liftoff identity parameter | `number` | 0.9 | | | +| `eggnogmapper_evalue` | Only report alignments below or equal the e-value threshold | `number` | +| 1e-05 | | | +| `eggnogmapper_pident` | Only report alignments above or equal to the given percentage of | +| identity (0-100) | `integer` | 35 | | | ## Post-annotation filtering options -| Parameter | Description | Type | Default | Required | Hidden | -| ----------------------------- | ----------------------------------------------------------------- | --------- | ------- | -------- | ------ | -| `allow_isoforms` | Allow multiple isoforms for gene models | `boolean` | True | | | -| `enforce_full_intron_support` | Require every model to have external evidence for all its introns | `boolean` | True | | | -| `filter_liftoff_by_hints` | Use BRAKER hints to filter Liftoff models | `boolean` | True | | | -| `eggnogmapper_purge_nohits` | Purge transcripts which do not have a hit against eggnog | `boolean` | | | | +| Parameter | Description | Type | Default | Required | Hidden | +| ----------------------------- | --------------------------------------------------------- | --------- | ------- | -------- | ------ | +| `allow_isoforms` | Allow multiple isoforms for gene models | `boolean` | True | | | +| `enforce_full_intron_support` | Require every model to have external evidence for all its | +| introns | `boolean` | True | | | +| `filter_liftoff_by_hints` | Use BRAKER hints to filter Liftoff models | `boolean` | True | | + +| +| `eggnogmapper_purge_nohits` | Purge transcripts which do not have a hit against eggnog | +`boolean` | | | | ## Annotation output options -| Parameter | Description | Type | Default | Required | Hidden | -| --------------------- | ----------------- | --------- | ------- | -------- | ------ | -| `braker_save_outputs` | Save BRAKER files | `boolean` | | | | +| Parameter | Description | Type | Default | Required | Hidden | +| ---------------------------------- | --------------------------------------------- | --------- | ------- | -------- | ------ | +| `braker_save_outputs` | Save BRAKER files | `boolean` | | | | +| `add_attrs_to_proteins_cds_fastas` | Add gff attributes to proteins/cDNA/cds fasta | `boolean` | +| | | | ## Evaluation options -| Parameter | Description | Type | Default | Required | Hidden | -| ------------------------ | --------------------------------------------------------------------------- | --------- | --------------- | -------- | ------ | -| `busco_skip` | Skip evaluation by BUSCO | `boolean` | | | | -| `busco_lineage_datasets` | BUSCO lineages as a space-separated list: 'fungi_odb10 microsporidia_odb10' | `string` | eukaryota_odb10 | | | +| Parameter | Description | Type | Default | Required | Hidden | +| ------------------------ | ------------------------------------------------------ | --------------- | ------- | -------- | ------ | +| `busco_skip` | Skip evaluation by BUSCO | `boolean` | | | | +| `busco_lineage_datasets` | BUSCO lineages as a space-separated list: 'fungi_odb10 | +| microsporidia_odb10' | `string` | eukaryota_odb10 | | | ## Institutional config options Parameters used to describe centralised config profiles. These should not be edited. -| Parameter | Description | Type | Default | Required | Hidden | -| ---------------------------- | ----------------------------------------- | -------- | -------------------------------------------------------- | -------- | ------ | -| `custom_config_version` | Git commit id for Institutional configs. | `string` | master | | True | -| `custom_config_base` | Base directory for Institutional configs. | `string` | https://raw.githubusercontent.com/nf-core/configs/master | | True | -| `config_profile_name` | Institutional config name. | `string` | | | True | -| `config_profile_description` | Institutional config description. | `string` | | | True | +| Parameter | Description | Type | Default | Required | Hidden | +| -------------------------------------------------------- | ----------------------------------------- | -------- | ------- | -------- | ------ | +| `custom_config_version` | Git commit id for Institutional configs. | `string` | master | | +| True | +| `custom_config_base` | Base directory for Institutional configs. | `string` | +| https://raw.githubusercontent.com/nf-core/configs/master | | True | +| `config_profile_name` | Institutional config name. | `string` | | | True | +| `config_profile_description` | Institutional config description. | `string` | | | True | ## Generic options @@ -99,15 +116,12 @@ Less common options for the pipeline, typically set in a config file. | Parameter | Description | Type | Default | Required | Hidden | | ------------------------ | ----------------------------------------------------------------- | --------- | ------- | -------- | ------ | | `version` | Display version and exit. | `boolean` | | | True | -| `publish_dir_mode` | Method used to save pipeline results to output directory. | `string` | copy | | True | -| `email_on_fail` | Email address for completion summary, only when pipeline fails. | `string` | | | True | +| `publish_dir_mode` | Method used to save pipeline results to output directory. | `string` | +| copy | | True | +| `email_on_fail` | Email address for completion summary, only when pipeline fails. | `string` | +| | True | | `plaintext_email` | Send plain-text email instead of HTML. | `boolean` | | | True | -| `max_multiqc_email_size` | File size limit when attaching MultiQC reports to summary emails. | `string` | 25.MB | | True | +| `max_multiqc_email_size` | File size limit when attaching MultiQC reports to summary emails. | +| `string` | 25.MB | | True | | `monochrome_logs` | Do not use coloured log outputs. | `boolean` | | | True | | `hook_url` | Incoming hook URL for messaging service | `string` | | | True | - -## Other parameters - -| Parameter | Description | Type | Default | Required | Hidden | -| ----------------------------- | ----------- | --------- | ------- | -------- | ------ | -| `add_attrs_to_gffread_fastas` | | `boolean` | | | | diff --git a/nextflow_schema.json b/nextflow_schema.json index 2cc3f8d..2092f00 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -286,6 +286,11 @@ "type": "boolean", "description": "Save BRAKER files", "fa_icon": "fas fa-question-circle" + }, + "add_attrs_to_proteins_cds_fastas": { + "type": "boolean", + "description": "Add gff attributes to proteins/cDNA/cds fasta", + "fa_icon": "fas fa-question-circle" } } }, @@ -433,10 +438,5 @@ { "$ref": "#/$defs/generic_options" } - ], - "properties": { - "add_attrs_to_proteins_cds_fastas": { - "type": "boolean" - } - } + ] } From d21d70e282f7dd6a05d30706bb807813f7099d9c Mon Sep 17 00:00:00 2001 From: liamlelievre Date: Thu, 5 Dec 2024 15:00:06 +1300 Subject: [PATCH 19/23] Add attributes option for -F -D to cds and cdna --- conf/modules.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index ca741e9..8e98b4f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -297,7 +297,7 @@ process { // SUBWORKFLOW: GFF_STORE } withName: '.*:GFF_STORE:EXTRACT_CDS' { - ext.args = '-x' + ext.args = params.add_attrs_to_proteins_cds_fastas ? '-F -D -x' : '-x' ext.prefix = { "${meta.id}.cds" } publishDir = [ @@ -307,7 +307,7 @@ process { // SUBWORKFLOW: GFF_STORE ] } withName: '.*:GFF_STORE:EXTRACT_CDNA' { - ext.args = '-w' + ext.args = params.add_attrs_to_proteins_cds_fastas ? '-F -D -w' : '-w' ext.prefix = { "${meta.id}.cdna" } publishDir = [ From 9be84b246b108972b5136ab8ec555969a24e7ec3 Mon Sep 17 00:00:00 2001 From: Usman Rashid Date: Thu, 5 Dec 2024 16:35:03 +1300 Subject: [PATCH 20/23] Fixed linting issues --- .github/workflows/ci.yml | 2 +- CHANGELOG.md | 2 +- docs/parameters.md | 129 ++++++++++++++++----------------------- nextflow_schema.json | 2 +- 4 files changed, 57 insertions(+), 78 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a4e5cc9..5a59fb5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -55,7 +55,7 @@ jobs: uses: actions/checkout@v4.2.1 - name: Install Nextflow - uses: nf-core/setup-nextflow@v2 + uses: nf-core/setup-nextflow@v2.0.0 with: version: "${{ matrix.NXF_VER }}" diff --git a/CHANGELOG.md b/CHANGELOG.md index ada72ae..1e2d164 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### 'Added' -1. Added cDNA and cds outputs to /annotations/ directory [#118](https://github.com/Plant-Food-Research-Open/genepal/issues/118) +1. Added cDNA and CDS outputs to /annotations/ directory [#118](https://github.com/Plant-Food-Research-Open/genepal/issues/118) ## v0.5.0 - [21-Nov-2024] diff --git a/docs/parameters.md b/docs/parameters.md index ef2bce2..9297c4a 100644 --- a/docs/parameters.md +++ b/docs/parameters.md @@ -4,20 +4,17 @@ A Nextflow pipeline for consensus, phased and pan-genome annotation. ## Input/output options -| Parameter | Description | Type | Default | Required | Hidden | -| ------------------------------------------ | ------------------------------------------------------------------------ | -------- | ------- | -------- | ------ | -| `input` | Target assemblies listed in a CSV sheet | `string` | | True | | -| `protein_evidence` | Protein evidence provided as a fasta file or multiple fasta files listed | -| in a plain txt file | `string` | | True | | -| `eggnogmapper_db_dir` | Eggnogmapper database directory | `string` | | | | -| `eggnogmapper_tax_scope` | Eggnogmapper taxonomy scopre. Eukaryota: 2759, Viridiplantae: | -| 33090, Archaea: 2157, Bacteria: 2, root: 1 | `integer` | 1 | | | -| `rna_evidence` | FASTQ/BAM samples listed in a CSV sheet | `string` | | | | -| `liftoff_annotations` | Reference annotations listed in a CSV sheet | `string` | | | | -| `orthofinder_annotations` | Additional annotations for orthology listed in a CSV sheet | -| `string` | | | | -| `outdir` | The output directory where the results will be saved | `string` | | True | | -| `email` | Email address for completion summary. | `string` | | | True | +| Parameter | Description | Type | Default | Required | Hidden | +| ------------------------- | -------------------------------------------------------------------------------------------------------- | --------- | ------- | -------- | ------ | +| `input` | Target assemblies listed in a CSV sheet | `string` | | True | | +| `protein_evidence` | Protein evidence provided as a fasta file or multiple fasta files listed in a plain txt file | `string` | | True | | +| `eggnogmapper_db_dir` | Eggnogmapper database directory | `string` | | | | +| `eggnogmapper_tax_scope` | Eggnogmapper taxonomy scopre. Eukaryota: 2759, Viridiplantae: 33090, Archaea: 2157, Bacteria: 2, root: 1 | `integer` | 1 | | | +| `rna_evidence` | FASTQ/BAM samples listed in a CSV sheet | `string` | | | | +| `liftoff_annotations` | Reference annotations listed in a CSV sheet | `string` | | | | +| `orthofinder_annotations` | Additional annotations for orthology listed in a CSV sheet | `string` | | | | +| `outdir` | The output directory where the results will be saved | `string` | | True | | +| `email` | Email address for completion summary. | `string` | | | True | ## Repeat annotation options @@ -30,84 +27,69 @@ A Nextflow pipeline for consensus, phased and pan-genome annotation. ## RNASeq pre-processing options -| Parameter | Description | Type | Default | Required | Hidden | -| ----------------------------------------- | ------------------------------------------------------------------ | --------- | ------- | -------- | ------ | -| `fastqc_skip` | Skip FASTQC or not? | `boolean` | True | | | -| `fastp_skip` | Skip trimming by FASTQP or not? | `boolean` | | | | -| `min_trimmed_reads` | Exclude a sample if its reads after trimming are below this number | -| `integer` | 10000 | | | -| `fastp_extra_args` | Extra FASTP arguments | `string` | | | | -| `save_trimmed` | Save FASTQ files after trimming or not? | `boolean` | | | | -| `remove_ribo_rna` | Remove Ribosomal RNA or not? | `boolean` | | | | -| `save_non_ribo_reads` | Save FASTQ files after Ribosomal RNA removal or not? | `boolean` | | -| | -| `ribo_database_manifest` | Ribosomal RNA fastas listed in a text sheet | `string` | -| ${projectDir}/assets/rrna-db-defaults.txt | | | +| Parameter | Description | Type | Default | Required | Hidden | +| ------------------------ | ------------------------------------------------------------------ | --------- | ----------------------------------------- | -------- | ------ | +| `fastqc_skip` | Skip FASTQC or not? | `boolean` | True | | | +| `fastp_skip` | Skip trimming by FASTQP or not? | `boolean` | | | | +| `min_trimmed_reads` | Exclude a sample if its reads after trimming are below this number | `integer` | 10000 | | | +| `fastp_extra_args` | Extra FASTP arguments | `string` | | | | +| `save_trimmed` | Save FASTQ files after trimming or not? | `boolean` | | | | +| `remove_ribo_rna` | Remove Ribosomal RNA or not? | `boolean` | | | | +| `save_non_ribo_reads` | Save FASTQ files after Ribosomal RNA removal or not? | `boolean` | | | | +| `ribo_database_manifest` | Ribosomal RNA fastas listed in a text sheet | `string` | ${projectDir}/assets/rrna-db-defaults.txt | | | ## RNASeq alignment options -| Parameter | Description | Type | Default | Required | Hidden | -| ------------------------ | ---------------------------------------- | --------- | ------- | -------- | ------ | -| `star_max_intron_length` | Maximum intron length for STAR alignment | `integer` | 16000 | | - -| -| `star_align_extra_args` | EXTRA arguments for STAR | `string` | | | | -| `star_save_outputs` | Save BAM files from STAR or not? | `boolean` | | | | -| `save_cat_bam` | SAVE a concatenated BAM file per assembly or not? | `boolean` | | | | +| Parameter | Description | Type | Default | Required | Hidden | +| ------------------------ | ------------------------------------------------- | --------- | ------- | -------- | ------ | +| `star_max_intron_length` | Maximum intron length for STAR alignment | `integer` | 16000 | | | +| `star_align_extra_args` | EXTRA arguments for STAR | `string` | | | | +| `star_save_outputs` | Save BAM files from STAR or not? | `boolean` | | | | +| `save_cat_bam` | SAVE a concatenated BAM file per assembly or not? | `boolean` | | | | ## Annotation options -| Parameter | Description | Type | Default | Required | Hidden | -| --------------------- | ---------------------------------------------------------------- | -------- | ------- | -------- | ------ | -| `braker_extra_args` | Extra arguments for BRAKER | `string` | | | | -| `liftoff_coverage` | Liftoff coverage parameter | `number` | 0.9 | | | -| `liftoff_identity` | Liftoff identity parameter | `number` | 0.9 | | | -| `eggnogmapper_evalue` | Only report alignments below or equal the e-value threshold | `number` | -| 1e-05 | | | -| `eggnogmapper_pident` | Only report alignments above or equal to the given percentage of | -| identity (0-100) | `integer` | 35 | | | +| Parameter | Description | Type | Default | Required | Hidden | +| --------------------- | --------------------------------------------------------------------------------- | --------- | ------- | -------- | ------ | +| `braker_extra_args` | Extra arguments for BRAKER | `string` | | | | +| `liftoff_coverage` | Liftoff coverage parameter | `number` | 0.9 | | | +| `liftoff_identity` | Liftoff identity parameter | `number` | 0.9 | | | +| `eggnogmapper_evalue` | Only report alignments below or equal the e-value threshold | `number` | 1e-05 | | | +| `eggnogmapper_pident` | Only report alignments above or equal to the given percentage of identity (0-100) | `integer` | 35 | | | ## Post-annotation filtering options -| Parameter | Description | Type | Default | Required | Hidden | -| ----------------------------- | --------------------------------------------------------- | --------- | ------- | -------- | ------ | -| `allow_isoforms` | Allow multiple isoforms for gene models | `boolean` | True | | | -| `enforce_full_intron_support` | Require every model to have external evidence for all its | -| introns | `boolean` | True | | | -| `filter_liftoff_by_hints` | Use BRAKER hints to filter Liftoff models | `boolean` | True | | - -| -| `eggnogmapper_purge_nohits` | Purge transcripts which do not have a hit against eggnog | -`boolean` | | | | +| Parameter | Description | Type | Default | Required | Hidden | +| ----------------------------- | ----------------------------------------------------------------- | --------- | ------- | -------- | ------ | +| `allow_isoforms` | Allow multiple isoforms for gene models | `boolean` | True | | | +| `enforce_full_intron_support` | Require every model to have external evidence for all its introns | `boolean` | True | | | +| `filter_liftoff_by_hints` | Use BRAKER hints to filter Liftoff models | `boolean` | True | | | +| `eggnogmapper_purge_nohits` | Purge transcripts which do not have a hit against eggnog | `boolean` | | | | ## Annotation output options | Parameter | Description | Type | Default | Required | Hidden | | ---------------------------------- | --------------------------------------------- | --------- | ------- | -------- | ------ | | `braker_save_outputs` | Save BRAKER files | `boolean` | | | | -| `add_attrs_to_proteins_cds_fastas` | Add gff attributes to proteins/cDNA/cds fasta | `boolean` | -| | | | +| `add_attrs_to_proteins_cds_fastas` | Add gff attributes to proteins/cDNA/CDS fasta | `boolean` | | | | ## Evaluation options -| Parameter | Description | Type | Default | Required | Hidden | -| ------------------------ | ------------------------------------------------------ | --------------- | ------- | -------- | ------ | -| `busco_skip` | Skip evaluation by BUSCO | `boolean` | | | | -| `busco_lineage_datasets` | BUSCO lineages as a space-separated list: 'fungi_odb10 | -| microsporidia_odb10' | `string` | eukaryota_odb10 | | | +| Parameter | Description | Type | Default | Required | Hidden | +| ------------------------ | --------------------------------------------------------------------------- | --------- | --------------- | -------- | ------ | +| `busco_skip` | Skip evaluation by BUSCO | `boolean` | | | | +| `busco_lineage_datasets` | BUSCO lineages as a space-separated list: 'fungi_odb10 microsporidia_odb10' | `string` | eukaryota_odb10 | | | ## Institutional config options Parameters used to describe centralised config profiles. These should not be edited. -| Parameter | Description | Type | Default | Required | Hidden | -| -------------------------------------------------------- | ----------------------------------------- | -------- | ------- | -------- | ------ | -| `custom_config_version` | Git commit id for Institutional configs. | `string` | master | | -| True | -| `custom_config_base` | Base directory for Institutional configs. | `string` | -| https://raw.githubusercontent.com/nf-core/configs/master | | True | -| `config_profile_name` | Institutional config name. | `string` | | | True | -| `config_profile_description` | Institutional config description. | `string` | | | True | +| Parameter | Description | Type | Default | Required | Hidden | +| ---------------------------- | ----------------------------------------- | -------- | -------------------------------------------------------- | -------- | ------ | +| `custom_config_version` | Git commit id for Institutional configs. | `string` | master | | True | +| `custom_config_base` | Base directory for Institutional configs. | `string` | https://raw.githubusercontent.com/nf-core/configs/master | | True | +| `config_profile_name` | Institutional config name. | `string` | | | True | +| `config_profile_description` | Institutional config description. | `string` | | | True | ## Generic options @@ -116,12 +98,9 @@ Less common options for the pipeline, typically set in a config file. | Parameter | Description | Type | Default | Required | Hidden | | ------------------------ | ----------------------------------------------------------------- | --------- | ------- | -------- | ------ | | `version` | Display version and exit. | `boolean` | | | True | -| `publish_dir_mode` | Method used to save pipeline results to output directory. | `string` | -| copy | | True | -| `email_on_fail` | Email address for completion summary, only when pipeline fails. | `string` | -| | True | +| `publish_dir_mode` | Method used to save pipeline results to output directory. | `string` | copy | | True | +| `email_on_fail` | Email address for completion summary, only when pipeline fails. | `string` | | | True | | `plaintext_email` | Send plain-text email instead of HTML. | `boolean` | | | True | -| `max_multiqc_email_size` | File size limit when attaching MultiQC reports to summary emails. | -| `string` | 25.MB | | True | +| `max_multiqc_email_size` | File size limit when attaching MultiQC reports to summary emails. | `string` | 25.MB | | True | | `monochrome_logs` | Do not use coloured log outputs. | `boolean` | | | True | | `hook_url` | Incoming hook URL for messaging service | `string` | | | True | diff --git a/nextflow_schema.json b/nextflow_schema.json index 2092f00..b7b5cc4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -289,7 +289,7 @@ }, "add_attrs_to_proteins_cds_fastas": { "type": "boolean", - "description": "Add gff attributes to proteins/cDNA/cds fasta", + "description": "Add gff attributes to proteins/cDNA/CDS fasta", "fa_icon": "fas fa-question-circle" } } From 767239a0e048ade192c4ce35e1314de1a2bef541 Mon Sep 17 00:00:00 2001 From: Usman Rashid Date: Thu, 5 Dec 2024 16:37:20 +1300 Subject: [PATCH 21/23] Updated snapshot --- tests/short/main.nf.test.snap | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/short/main.nf.test.snap b/tests/short/main.nf.test.snap index 62a4ed3..46055d1 100644 --- a/tests/short/main.nf.test.snap +++ b/tests/short/main.nf.test.snap @@ -5,11 +5,11 @@ "successful tasks": 0, "versions": { "Workflow": { - "plant-food-research-open/genepal": "v0.5.0" + "plant-food-research-open/genepal": "v0.6.0" } }, "stable paths": [ - + ] } ], @@ -17,6 +17,6 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-22T11:39:43.110621" + "timestamp": "2024-12-05T16:37:07.37961" } -} +} \ No newline at end of file From e04261544864039eb2a0a5584d977f43d0031df4 Mon Sep 17 00:00:00 2001 From: Usman Rashid Date: Thu, 5 Dec 2024 16:39:16 +1300 Subject: [PATCH 22/23] Fixed nextflow-setup version --- .github/workflows/download_pipeline.yml | 2 +- .github/workflows/linting.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml index 713dc3e..4f109b3 100644 --- a/.github/workflows/download_pipeline.yml +++ b/.github/workflows/download_pipeline.yml @@ -30,7 +30,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Install Nextflow - uses: nf-core/setup-nextflow@v2 + uses: nf-core/setup-nextflow@v2.0.0 - name: Disk space cleanup uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 6bfe937..9148360 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -34,7 +34,7 @@ jobs: uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v2 + uses: nf-core/setup-nextflow@v2.0.0 - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: From 43742fb84e9ec41655ac4739e6e2fa95468cbdff Mon Sep 17 00:00:00 2001 From: Usman Rashid Date: Thu, 5 Dec 2024 16:43:05 +1300 Subject: [PATCH 23/23] Fixed indent --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index c91980e..363f0c5 100644 --- a/nextflow.config +++ b/nextflow.config @@ -57,7 +57,7 @@ params { // Annotation output options braker_save_outputs = false - add_attrs_to_proteins_cds_fastas = false + add_attrs_to_proteins_cds_fastas = false // Evaluation options busco_skip = false