diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a4e5cc9..5a59fb5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -55,7 +55,7 @@ jobs: uses: actions/checkout@v4.2.1 - name: Install Nextflow - uses: nf-core/setup-nextflow@v2 + uses: nf-core/setup-nextflow@v2.0.0 with: version: "${{ matrix.NXF_VER }}" diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml index 713dc3e..4f109b3 100644 --- a/.github/workflows/download_pipeline.yml +++ b/.github/workflows/download_pipeline.yml @@ -30,7 +30,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Install Nextflow - uses: nf-core/setup-nextflow@v2 + uses: nf-core/setup-nextflow@v2.0.0 - name: Disk space cleanup uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 6bfe937..9148360 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -34,7 +34,7 @@ jobs: uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v2 + uses: nf-core/setup-nextflow@v2.0.0 - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: diff --git a/.nf-core.yml b/.nf-core.yml index 260014a..1641b31 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -30,5 +30,5 @@ template: outdir: . skip_features: - igenomes - version: 0.5.0 + version: 0.6.0 update: null diff --git a/CHANGELOG.md b/CHANGELOG.md index 79627bc..1e2d164 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v0.6.0 - [4-Dec-2024] + +### 'Added' + +1. Added cDNA and CDS outputs to /annotations/ directory [#118](https://github.com/Plant-Food-Research-Open/genepal/issues/118) + ## v0.5.0 - [21-Nov-2024] ### `Added` diff --git a/CITATION.cff b/CITATION.cff index c5c6c26..4a2ad92 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -31,7 +31,7 @@ authors: - family-names: "Thomson" given-names: "Susan" title: "genepal: A Nextflow pipeline for genome and pan-genome annotation" -version: 0.5.0 +version: 0.6.0 date-released: 2024-11-21 url: "https://github.com/Plant-Food-Research-Open/genepal" doi: 10.5281/zenodo.14195006 diff --git a/README.md b/README.md index 766d5fe..0cc92ea 100644 --- a/README.md +++ b/README.md @@ -97,7 +97,7 @@ sbatch ./pfr_genepal plant-food-research-open/genepal workflows were originally scripted by Jason Shiller ([@jasonshiller](https://github.com/jasonshiller)). Usman Rashid ([@gallvp](https://github.com/gallvp)) wrote the Nextflow pipeline. -We thank the following people for their extensive assistance in the development of this pipeline: +We thank the following people for extensive assistance in the development of the pipeline, - Cecilia Deng [@CeciliaDeng](https://github.com/CeciliaDeng) - Charles David [@charlesdavid](https://github.com/charlesdavid) @@ -107,6 +107,10 @@ We thank the following people for their extensive assistance in the development - Susan Thomson [@cflsjt](https://github.com/cflsjt) - Ting-Hsuan Chen [@ting-hsuan-chen](https://github.com/ting-hsuan-chen) +and for contributions to the codebase, + +- Liam Le Lievre [@liamlelievre](https://github.com/liamlelievre) + The pipeline uses nf-core modules contributed by following authors: @@ -139,6 +143,7 @@ The pipeline uses nf-core modules contributed by following authors: + ## Contributions and Support diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 8969c04..6682724 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > This report has been generated by the plant-food-research-open/genepal analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "plant-food-research-open-genepal-methods-description": diff --git a/conf/modules.config b/conf/modules.config index ba2bb72..8e98b4f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -286,7 +286,7 @@ process { // SUBWORKFLOW: GFF_STORE } withName: '.*:GFF_STORE:EXTRACT_PROTEINS' { - ext.args = params.add_attrs_to_proteins_fasta ? '-F -D -y' : '-y' + ext.args = params.add_attrs_to_proteins_cds_fastas ? '-F -D -y' : '-y' ext.prefix = { "${meta.id}.pep" } publishDir = [ @@ -295,6 +295,27 @@ process { // SUBWORKFLOW: GFF_STORE saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] } + + withName: '.*:GFF_STORE:EXTRACT_CDS' { + ext.args = params.add_attrs_to_proteins_cds_fastas ? '-F -D -x' : '-x' + ext.prefix = { "${meta.id}.cds" } + + publishDir = [ + path: { "${params.outdir}/annotations/$meta.id" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + withName: '.*:GFF_STORE:EXTRACT_CDNA' { + ext.args = params.add_attrs_to_proteins_cds_fastas ? '-F -D -w' : '-w' + ext.prefix = { "${meta.id}.cdna" } + + publishDir = [ + path: { "${params.outdir}/annotations/$meta.id" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } } process { // SUBWORKFLOW: FASTA_ORTHOFINDER diff --git a/docs/output.md b/docs/output.md index 44e598a..f4793b5 100644 --- a/docs/output.md +++ b/docs/output.md @@ -169,6 +169,8 @@ If more than one genome is included in the pipeline, [ORTHOFINDER](https://githu - `Y/` - `Y.gt.gff3`: Final annotation file for genome `Y` which contains gene models and their functional annotations - `Y.pep.fasta`: Protein sequences for the gene models + - 'Y.cdna.fasta': cDNA sequences for the gene models + - 'Y.cds.fasta': Coding sequences for the gene models diff --git a/docs/parameters.md b/docs/parameters.md index 1c96a8b..9297c4a 100644 --- a/docs/parameters.md +++ b/docs/parameters.md @@ -68,10 +68,10 @@ A Nextflow pipeline for consensus, phased and pan-genome annotation. ## Annotation output options -| Parameter | Description | Type | Default | Required | Hidden | -| ----------------------------- | ------------------------------------ | --------- | ------- | -------- | ------ | -| `braker_save_outputs` | Save BRAKER files | `boolean` | | | | -| `add_attrs_to_proteins_fasta` | Add gff attributes to proteins fasta | `boolean` | | | | +| Parameter | Description | Type | Default | Required | Hidden | +| ---------------------------------- | --------------------------------------------- | --------- | ------- | -------- | ------ | +| `braker_save_outputs` | Save BRAKER files | `boolean` | | | | +| `add_attrs_to_proteins_cds_fastas` | Add gff attributes to proteins/cDNA/CDS fasta | `boolean` | | | | ## Evaluation options diff --git a/nextflow.config b/nextflow.config index 665e6b3..363f0c5 100644 --- a/nextflow.config +++ b/nextflow.config @@ -57,7 +57,7 @@ params { // Annotation output options braker_save_outputs = false - add_attrs_to_proteins_fasta = false + add_attrs_to_proteins_cds_fastas = false // Evaluation options busco_skip = false @@ -261,7 +261,7 @@ manifest { description = """A Nextflow pipeline for consensus, phased and pan-genome annotation.""" mainScript = 'main.nf' nextflowVersion = '!>=24.04.2' - version = '0.5.0' + version = '0.6.0' doi = 'https://doi.org/10.5281/zenodo.14195006' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 0258683..b7b5cc4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -287,10 +287,10 @@ "description": "Save BRAKER files", "fa_icon": "fas fa-question-circle" }, - "add_attrs_to_proteins_fasta": { + "add_attrs_to_proteins_cds_fastas": { "type": "boolean", - "fa_icon": "fas fa-question-circle", - "description": "Add gff attributes to proteins fasta" + "description": "Add gff attributes to proteins/cDNA/CDS fasta", + "fa_icon": "fas fa-question-circle" } } }, diff --git a/subworkflows/local/gff_store.nf b/subworkflows/local/gff_store.nf index 2c877a9..2ce008f 100644 --- a/subworkflows/local/gff_store.nf +++ b/subworkflows/local/gff_store.nf @@ -2,6 +2,8 @@ import java.net.URLEncoder include { GT_GFF3 as FINAL_GFF_CHECK } from '../../modules/nf-core/gt/gff3/main' include { GFFREAD as EXTRACT_PROTEINS } from '../../modules/nf-core/gffread/main' +include { GFFREAD as EXTRACT_CDS } from '../../modules/nf-core/gffread/main' +include { GFFREAD as EXTRACT_CDNA } from '../../modules/nf-core/gffread/main' workflow GFF_STORE { take: @@ -133,9 +135,34 @@ workflow GFF_STORE { ch_final_proteins = EXTRACT_PROTEINS.out.gffread_fasta ch_versions = ch_versions.mix(EXTRACT_PROTEINS.out.versions.first()) + // MODULE: GFFREAD as EXTRACT_CDS + ch_cds_extraction_inputs = ch_final_gff + | join(ch_fasta) + + EXTRACT_CDS( + ch_cds_extraction_inputs.map { meta, gff, fasta -> [ meta, gff ] }, + ch_cds_extraction_inputs.map { meta, gff, fasta -> fasta } + ) + + ch_final_cds = EXTRACT_CDS.out.gffread_fasta + ch_versions = ch_versions.mix(EXTRACT_CDS.out.versions.first()) + + // MODULE: GFFREAD as EXTRACT_CDNA + ch_cdna_extraction_inputs = ch_final_gff + | join(ch_fasta) + + EXTRACT_CDNA( + ch_cdna_extraction_inputs.map { meta, gff, fasta -> [ meta, gff ] }, + ch_cdna_extraction_inputs.map { meta, gff, fasta -> fasta} + ) + + ch_final_cdna = EXTRACT_CDNA.out.gffread_fasta + ch_versions = ch_versions.mix(EXTRACT_CDNA.out.versions.first()) emit: final_gff = ch_final_gff // [ meta, gff ] final_proteins = ch_final_proteins // [ meta, fasta ] + final_cds = ch_final_cds // [ meta, fasta ] + final_cdna = ch_final_cdna // [ meta, fasta ] versions = ch_versions // [ versions.yml ] } diff --git a/tests/minimal/main.nf.test.snap b/tests/minimal/main.nf.test.snap index 02c228f..b1f61c8 100644 --- a/tests/minimal/main.nf.test.snap +++ b/tests/minimal/main.nf.test.snap @@ -2,7 +2,7 @@ "profile - test": { "content": [ { - "successful tasks": 18, + "successful tasks": 20, "versions": { "AGAT_CONVERTSPGFF2GTF": { "agat": "v1.4.0" @@ -25,6 +25,12 @@ "CAT_PROTEIN_FASTAS": { "pigz": "2.3.4" }, + "EXTRACT_CDNA": { + "gffread": "0.12.7" + }, + "EXTRACT_CDS": { + "gffread": "0.12.7" + }, "EXTRACT_PROTEINS": { "gffread": "0.12.7" }, @@ -55,10 +61,12 @@ "tsebra": "1.1.2.5" }, "Workflow": { - "plant-food-research-open/genepal": "v0.5.0" + "plant-food-research-open/genepal": "v0.6.0" } }, "stable paths": [ + "a_thaliana.cdna.fasta:md5,12b9bef973e488640aec8c04ba3882fe", + "a_thaliana.cds.fasta:md5,b81060419355a590560f92aec8536281", "a_thaliana.gt.gff3:md5,8ab16549095f605ff8715ac4a3de58ed", "a_thaliana.pep.fasta:md5,4994c0393ca0245a1c57966d846d101e", "a_thaliana.gff3:md5,d23d16cd86499d48a30ffb981ed27891", @@ -67,6 +75,8 @@ "stable names": [ "annotations", "annotations/a_thaliana", + "annotations/a_thaliana/a_thaliana.cdna.fasta", + "annotations/a_thaliana/a_thaliana.cds.fasta", "annotations/a_thaliana/a_thaliana.gt.gff3", "annotations/a_thaliana/a_thaliana.pep.fasta", "etc", @@ -81,9 +91,9 @@ } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.04.2" }, - "timestamp": "2024-11-19T11:35:02.477202" + "timestamp": "2024-12-05T07:51:43.818374" } -} +} \ No newline at end of file diff --git a/tests/short/main.nf.test.snap b/tests/short/main.nf.test.snap index 62a4ed3..46055d1 100644 --- a/tests/short/main.nf.test.snap +++ b/tests/short/main.nf.test.snap @@ -5,11 +5,11 @@ "successful tasks": 0, "versions": { "Workflow": { - "plant-food-research-open/genepal": "v0.5.0" + "plant-food-research-open/genepal": "v0.6.0" } }, "stable paths": [ - + ] } ], @@ -17,6 +17,6 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-22T11:39:43.110621" + "timestamp": "2024-12-05T16:37:07.37961" } -} +} \ No newline at end of file diff --git a/tests/stub/main.nf.test.snap b/tests/stub/main.nf.test.snap index 524ce21..3d3949b 100644 --- a/tests/stub/main.nf.test.snap +++ b/tests/stub/main.nf.test.snap @@ -2,7 +2,7 @@ "full - stub": { "content": [ { - "successful tasks": 154, + "successful tasks": 162, "versions": { "AGAT_CONVERTSPGFF2GTF": { "agat": "v1.4.0" @@ -55,6 +55,12 @@ "EGGNOGMAPPER": { "eggnog-mapper": "2.1.12" }, + "EXTRACT_CDNA": { + "gffread": "0.12.7" + }, + "EXTRACT_CDS": { + "gffread": "0.12.7" + }, "EXTRACT_PROTEINS": { "gffread": "0.12.7" }, @@ -143,25 +149,33 @@ "tsebra": "1.1.2.5" }, "Workflow": { - "plant-food-research-open/genepal": "v0.5.0" + "plant-food-research-open/genepal": "v0.6.0" } }, "stable paths": [ + "donghong.cdna.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", + "donghong.cds.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", "donghong.emapper.annotations:md5,d41d8cd98f00b204e9800998ecf8427e", "donghong.emapper.hits:md5,d41d8cd98f00b204e9800998ecf8427e", "donghong.emapper.seed_orthologs:md5,d41d8cd98f00b204e9800998ecf8427e", "donghong.gt.gff3:md5,d41d8cd98f00b204e9800998ecf8427e", "donghong.pep.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", + "red5_v2p1.cdna.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", + "red5_v2p1.cds.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", "red5_v2p1.emapper.annotations:md5,d41d8cd98f00b204e9800998ecf8427e", "red5_v2p1.emapper.hits:md5,d41d8cd98f00b204e9800998ecf8427e", "red5_v2p1.emapper.seed_orthologs:md5,d41d8cd98f00b204e9800998ecf8427e", "red5_v2p1.gt.gff3:md5,d41d8cd98f00b204e9800998ecf8427e", "red5_v2p1.pep.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", + "red5_v3.cdna.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", + "red5_v3.cds.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", "red5_v3.emapper.annotations:md5,d41d8cd98f00b204e9800998ecf8427e", "red5_v3.emapper.hits:md5,d41d8cd98f00b204e9800998ecf8427e", "red5_v3.emapper.seed_orthologs:md5,d41d8cd98f00b204e9800998ecf8427e", "red5_v3.gt.gff3:md5,d41d8cd98f00b204e9800998ecf8427e", "red5_v3.pep.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", + "red7_v5.cdna.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", + "red7_v5.cds.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", "red7_v5.emapper.annotations:md5,d41d8cd98f00b204e9800998ecf8427e", "red7_v5.emapper.hits:md5,d41d8cd98f00b204e9800998ecf8427e", "red7_v5.emapper.seed_orthologs:md5,d41d8cd98f00b204e9800998ecf8427e", @@ -188,9 +202,9 @@ } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.04.2" }, - "timestamp": "2024-11-21T12:34:14.056074" + "timestamp": "2024-12-05T07:56:38.915238" } -} +} \ No newline at end of file