diff --git a/CHANGELOG.md b/CHANGELOG.md index b938f472a..f888dc21f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,29 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## nf-core/ampliseq version 2.11.0 - 2024-08-06 + +### `Added` + +- [#765](https://github.com/nf-core/ampliseq/pull/765) - Added version R09-RS220 of curated GTDB 16S taxonomy: `sbdi-gtdb=R09-RS220-1` or `sbdi-gtdb` as parameter to `--dada_ref_taxonomy` +- [#766](https://github.com/nf-core/ampliseq/pull/766) - Added version 10 of Unite as parameter for `--sintax_ref_taxonomy`: `unite-fungi=10.0` and `unite-alleuk=10.0` + +### `Changed` + +- [#762](https://github.com/nf-core/ampliseq/pull/762) - Improved output documentation section "Optional ASV filtering" and parameter documentation +- [#766](https://github.com/nf-core/ampliseq/pull/766) - Modified warning filenames from `QIIME2_ANCOM` to avoid collisions +- [#766](https://github.com/nf-core/ampliseq/pull/766),[#769](https://github.com/nf-core/ampliseq/pull/769) - Disabled Unite databases from the `--qiime_ref_taxonomy` because of divergent results compared to the other classifiers + +### `Fixed` + +- [#761](https://github.com/nf-core/ampliseq/pull/761) - Some sample sheet checks were not applied due to changes in the metadata ["meta"] structure in version 2.9.0 +- [#766](https://github.com/nf-core/ampliseq/pull/766) - Fixed broken urls for Unite databases (issue [#764](https://github.com/nf-core/ampliseq/issues/764)) +- [#769](https://github.com/nf-core/ampliseq/pull/769) - Reference taxonomy database values were not properly validated in versions 2.9.0 and 2.10.0 + +### `Dependencies` + +### `Removed` + ## nf-core/ampliseq version 2.10.0 - 2024-06-27 ### `Added` diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 0a63581fb..576917076 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/ampliseq + This report has been generated by the nf-core/ampliseq analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-ampliseq-methods-description": order: -1000 diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index a1bc6b4b8..761197397 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -1544,7 +1544,7 @@ for (folder in ancom) { any_ancombc <- !isFALSE(params$ancombc) || !isFALSE(params$ancombc_formula) ``` -```{r, eval = !isFALSE(params$any_ancombc), results='asis'} +```{r, eval = !isFALSE(any_ancombc), results='asis'} cat(paste0(" ## ANCOM-BC diff --git a/bin/taxref_reformat_sintax.sh b/bin/taxref_reformat_sintax_fasta.sh similarity index 99% rename from bin/taxref_reformat_sintax.sh rename to bin/taxref_reformat_sintax_fasta.sh index b076d0af0..30e63fca7 100755 --- a/bin/taxref_reformat_sintax.sh +++ b/bin/taxref_reformat_sintax_fasta.sh @@ -5,4 +5,3 @@ # Just rename the preformatted file # Assumes only one (gzipped) file mv * sintaxdb.fa.gz - diff --git a/bin/taxref_reformat_sintax_tar.sh b/bin/taxref_reformat_sintax_tar.sh new file mode 100755 index 000000000..f8a3e4230 --- /dev/null +++ b/bin/taxref_reformat_sintax_tar.sh @@ -0,0 +1,13 @@ +#!/bin/sh + +# Handles preformatted database tar files suitable for sintax +# +# This turned out to be a MISTAKE and is NOT USED, but I'm keeping the file for a while anyway. + +# Extract the fasta file without _dev in its name +f=$(tar tfz *.tgz | grep fasta | grep -v '_dev') +tar xzf *.tgz $f + +# Change the name and gzip +mv $f sintaxdb.fa +gzip sintaxdb.fa diff --git a/conf/ref_databases.config b/conf/ref_databases.config index c2db0ed21..cd2edf43e 100644 --- a/conf/ref_databases.config +++ b/conf/ref_databases.config @@ -130,11 +130,19 @@ params { dbversion = "RDP 18/11.5 (https://zenodo.org/record/4310151/)" } 'sbdi-gtdb' { - title = "SBDI-GTDB - Sativa curated 16S GTDB database - Release R08-RS214-1" - file = [ "https://figshare.scilifelab.se/ndownloader/files/45818841", "https://figshare.scilifelab.se/ndownloader/files/45818850" ] + title = "SBDI-GTDB - Sativa curated 16S GTDB database - Release R09-RS220-1" + file = [ "https://figshare.scilifelab.se/ndownloader/files/47244061", "https://figshare.scilifelab.se/ndownloader/files/47244076" ] citation = "Lundin D, Andersson A. SBDI Sativa curated 16S GTDB database. FigShare. doi: 10.17044/scilifelab.14869077.v6" fmtscript = "taxref_reformat_sbdi-gtdb.sh" - dbversion = "SBDI-GTDB-R08-RS214-1 (https://figshare.scilifelab.se/articles/dataset/SBDI_Sativa_curated_16S_GTDB_database/14869077/6)" + dbversion = "SBDI-GTDB-R09-RS220-1 (https://figshare.scilifelab.se/articles/dataset/SBDI_Sativa_curated_16S_GTDB_database/14869077/7)" + taxlevels = "Domain,Kingdom,Phylum,Class,Order,Family,Genus,Species" + } + 'sbdi-gtdb=R09-RS220-1' { + title = "SBDI-GTDB - Sativa curated 16S GTDB database - Release R09-RS220-1" + file = [ "https://figshare.scilifelab.se/ndownloader/files/47244061", "https://figshare.scilifelab.se/ndownloader/files/47244076" ] + citation = "Lundin D, Andersson A. SBDI Sativa curated 16S GTDB database. FigShare. doi: 10.17044/scilifelab.14869077.v6" + fmtscript = "taxref_reformat_sbdi-gtdb.sh" + dbversion = "SBDI-GTDB-R09-RS220-1 (https://figshare.scilifelab.se/articles/dataset/SBDI_Sativa_curated_16S_GTDB_database/14869077/7)" taxlevels = "Domain,Kingdom,Phylum,Class,Order,Family,Genus,Species" } 'sbdi-gtdb=R08-RS214-1' { @@ -192,64 +200,64 @@ params { } 'unite-fungi' { title = "UNITE general FASTA release for Fungi - Version 9.0" - file = [ "https://files.plutof.ut.ee/public/orig/1E/25/1E25CA4CC30A31C2E2B8CB2C89824C83D080A7F5A62E6263A0E95B37C6628067.tgz" ] - citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2022): UNITE general FASTA release for Fungi. Version 16.10.2022. UNITE Community. https://doi.org/10.15156/BIO/2483911" + file = [ "https://s3.hpc.ut.ee/plutof-public/original/fa1038da-d18d-46b7-88a9-c21bcf38c43d.tgz" ] + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2023): UNITE general FASTA release for Fungi. Version 18.07.2023. UNITE Community. https://doi.org/10.15156/BIO/2938067" fmtscript = "taxref_reformat_unite.sh" - dbversion = "UNITE-fungi v9.0 (https://doi.org/10.15156/BIO/2483911)" + dbversion = "UNITE-fungi v9.0 (https://doi.org/10.15156/BIO/2938067)" shfile = [ "https://figshare.scilifelab.se/ndownloader/files/40788767", "https://figshare.scilifelab.se/ndownloader/files/40788770"] } 'unite-fungi=9.0' { title = "UNITE general FASTA release for Fungi - Version 9.0" - file = [ "https://files.plutof.ut.ee/public/orig/1E/25/1E25CA4CC30A31C2E2B8CB2C89824C83D080A7F5A62E6263A0E95B37C6628067.tgz" ] - citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2022): UNITE general FASTA release for Fungi. Version 16.10.2022. UNITE Community. https://doi.org/10.15156/BIO/2483911" + file = [ "https://s3.hpc.ut.ee/plutof-public/original/fa1038da-d18d-46b7-88a9-c21bcf38c43d.tgz" ] + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2023): UNITE general FASTA release for Fungi. Version 18.07.2023. UNITE Community. https://doi.org/10.15156/BIO/2938067" fmtscript = "taxref_reformat_unite.sh" - dbversion = "UNITE-fungi v9.0 (https://doi.org/10.15156/BIO/2483911)" + dbversion = "UNITE-fungi v9.0 (https://doi.org/10.15156/BIO/2938067)" shfile = [ "https://figshare.scilifelab.se/ndownloader/files/40788767", "https://figshare.scilifelab.se/ndownloader/files/40788770"] } 'unite-fungi=8.3' { title = "UNITE general FASTA release for Fungi - Version 8.3" - file = [ "https://files.plutof.ut.ee/public/orig/7B/23/7B235835FAF5C85D7B01E40FEF17F687914CB81A182554C5BD95E3168328E604.tgz" ] - citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2021): UNITE general FASTA release for Fungi. UNITE Community. 10.15156/BIO/1280049" + file = [ "https://s3.hpc.ut.ee/plutof-public/original/a0d487ac-1339-4bea-a0e9-8627f87a2f88.tgz" ] + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2021): UNITE general FASTA release for Fungi. Version 10.05.2021. UNITE Community. https://doi.org/10.15156/BIO/1280049" fmtscript = "taxref_reformat_unite.sh" dbversion = "UNITE-fungi v8.3 (https://doi.org/10.15156/BIO/1280049)" shfile = [ "https://scilifelab.figshare.com/ndownloader/files/34497977", "https://scilifelab.figshare.com/ndownloader/files/34497980"] } 'unite-fungi=8.2' { title = "UNITE general FASTA release for Fungi - Version 8.2" - file = [ "https://files.plutof.ut.ee/public/orig/E7/28/E728E2CAB797C90A01CD271118F574B8B7D0DAEAB7E81193EB89A2AC769A0896.gz" ] - citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2020): UNITE general FASTA release for Fungi. UNITE Community. 10.15156/BIO/786368" + file = [ "https://s3.hpc.ut.ee/plutof-public/original/6fdb458a-9299-41dc-8774-5152b867d882.gz" ] + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2020): UNITE general FASTA release for Fungi. Version 04.02.2020. UNITE Community. https://doi.org/10.15156/BIO/786368" fmtscript = "taxref_reformat_unite.sh" - dbversion = "UNITE-fungi v8.2 (https://doi.org/10.15156/BIO/786368)" + dbversion = "UNITE-fungi v8.2 (https://doi.plutof.ut.ee/doi/10.15156/BIO/786368)" shfile = [ "https://scilifelab.figshare.com/ndownloader/files/34497971", "https://scilifelab.figshare.com/ndownloader/files/34497974"] } 'unite-alleuk' { title = "UNITE general FASTA release for eukaryotes - Version 9.0" - file = [ "https://files.plutof.ut.ee/public/orig/F2/62/F262D942DEB8CAB3AEB9F313F67B04050E364B72E6707F99755DDCB271C45A48.tgz" ] - citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2022): UNITE general FASTA release for eukaryotes. Version 16.10.2022. UNITE Community. https://doi.org/10.15156/BIO/2483913" + file = [ "https://s3.hpc.ut.ee/plutof-public/original/e318f5fd-1ef4-40fd-9e77-1b94d91b3858.tgz" ] + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2023): UNITE general FASTA release for eukaryotes. Version 18.07.2023. UNITE Community. https://doi.org/10.15156/BIO/2938069" fmtscript = "taxref_reformat_unite.sh" - dbversion = "UNITE-alleuk v9.0 (https://doi.org/10.15156/BIO/2483913)" + dbversion = "UNITE-alleuk v9.0 (https://doi.org/10.15156/BIO/2938069)" shfile = [ "https://figshare.scilifelab.se/ndownloader/files/40788773", "https://figshare.scilifelab.se/ndownloader/files/40788776"] } 'unite-alleuk=9.0' { title = "UNITE general FASTA release for eukaryotes - Version 9.0" - file = [ "https://files.plutof.ut.ee/public/orig/F2/62/F262D942DEB8CAB3AEB9F313F67B04050E364B72E6707F99755DDCB271C45A48.tgz" ] - citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2022): UNITE general FASTA release for eukaryotes. Version 16.10.2022. UNITE Community. https://doi.org/10.15156/BIO/2483913" + file = [ "https://s3.hpc.ut.ee/plutof-public/original/e318f5fd-1ef4-40fd-9e77-1b94d91b3858.tgz" ] + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2023): UNITE general FASTA release for eukaryotes. Version 18.07.2023. UNITE Community. https://doi.org/10.15156/BIO/2938069" fmtscript = "taxref_reformat_unite.sh" - dbversion = "UNITE-alleuk v9.0 (https://doi.org/10.15156/BIO/2483913)" + dbversion = "UNITE-alleuk v9.0 (https://doi.org/10.15156/BIO/2938069)" shfile = [ "https://figshare.scilifelab.se/ndownloader/files/40788773", "https://figshare.scilifelab.se/ndownloader/files/40788776"] } 'unite-alleuk=8.3' { title = "UNITE general FASTA release for eukaryotes - Version 8.3" - file = [ "https://files.plutof.ut.ee/public/orig/E5/F5/E5F5E426DEC78BA2F7EC530621DDBD3F10564A09CBC2A5C4D3B3CBE7E37C5E1A.tgz" ] - citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2021): UNITE general FASTA release for eukaryotes. UNITE Community. 10.15156/BIO/1280127" + file = [ "https://s3.hpc.ut.ee/plutof-public/original/6e72e839-acfc-4044-84b4-5152b74c1552.tgz" ] + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2021): UNITE general FASTA release for eukaryotes. Version 10.05.2021. UNITE Community. https://doi.org/10.15156/BIO/1280127" fmtscript = "taxref_reformat_unite.sh" dbversion = "UNITE-alleuk v8.3 (https://doi.org/10.15156/BIO/1280127)" shfile = [ "https://scilifelab.figshare.com/ndownloader/files/34994575", "https://scilifelab.figshare.com/ndownloader/files/34994578"] } 'unite-alleuk=8.2' { title = "UNITE general FASTA release for eukaryotes - Version 8.2" - file = [ "https://files.plutof.ut.ee/public/orig/F9/ED/F9EDE36E5209F469056675EBD672425BC06EACB7FE0C0D18F5A13E4CA632DCFA.gz" ] - citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2020): UNITE general FASTA release for eukaryotes. UNITE Community. 10.15156/BIO/786370" + file = [ "https://s3.hpc.ut.ee/plutof-public/original/dbd892b6-0446-4351-988e-922c4b73585e.gz" ] + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2020): UNITE general FASTA release for eukaryotes. Version 04.02.2020. UNITE Community. https://doi.org/10.15156/BIO/786370" fmtscript = "taxref_reformat_unite.sh" dbversion = "UNITE-alleuk v8.2 (https://doi.org/10.15156/BIO/786370)" shfile = [ "https://scilifelab.figshare.com/ndownloader/files/34994569", "https://scilifelab.figshare.com/ndownloader/files/34994572"] @@ -288,49 +296,74 @@ params { license = "https://www.arb-silva.de/silva-license-information/" fmtscript = "taxref_reformat_qiime_silva138.sh" } + + // We have disabled all Unite databases for the QIIME2 classifiers + // since we get results that differ considerably from the Sintax and + // DADA2 classifiers with the corresponding databases. + + /*** //UNITE for QIIME2, see https://unite.ut.ee/repository.php 'unite-fungi' { - title = "UNITE QIIME release for Fungi - Version 8.3" - file = [ "https://files.plutof.ut.ee/public/orig/C5/54/C5547B97AAA979E45F79DC4C8C4B12113389343D7588716B5AD330F8BDB300C9.tgz" ] - citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2021): UNITE QIIME release for Fungi. Version 10.05.2021. UNITE Community. https://doi.org/10.15156/BIO/1264708" + title = "UNITE QIIME release for Fungi - Version 10.0" + file = [ "https://s3.hpc.ut.ee/plutof-public/original/db1d6ddb-a35d-48c5-8b1a-ad9dd3310c6d.tgz" ] + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2024): UNITE QIIME release for Fungi. Version 04.04.2024. UNITE Community. https://doi.org/10.15156/BIO/2959336" + fmtscript = "taxref_reformat_qiime_unite.sh" + } + 'unite-fungi=10.0' { + title = "UNITE QIIME release for Fungi - Version 10.0" + file = [ "https://s3.hpc.ut.ee/plutof-public/original/db1d6ddb-a35d-48c5-8b1a-ad9dd3310c6d.tgz" ] + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2024): UNITE QIIME release for Fungi. Version 04.04.2024. UNITE Community. https://doi.org/10.15156/BIO/2959336" + fmtscript = "taxref_reformat_qiime_unite.sh" + } + 'unite-fungi=9.0' { + title = "UNITE QIIME release for Fungi - Version 9.0" + file = [ "https://s3.hpc.ut.ee/plutof-public/original/3c22fbc5-ed9e-47a6-a85b-6c81268657e9.tgz" ] + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2023): UNITE QIIME release for Fungi. Version 18.07.2023. UNITE Community. https://doi.org/10.15156/BIO/2938079" fmtscript = "taxref_reformat_qiime_unite.sh" } 'unite-fungi=8.3' { title = "UNITE QIIME release for Fungi - Version 8.3" - file = [ "https://files.plutof.ut.ee/public/orig/C5/54/C5547B97AAA979E45F79DC4C8C4B12113389343D7588716B5AD330F8BDB300C9.tgz" ] + file = [ "https://s3.hpc.ut.ee/plutof-public/original/3652451d-7567-4871-a7aa-3ba6c63aa60b.tgz" ] citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2021): UNITE QIIME release for Fungi. Version 10.05.2021. UNITE Community. https://doi.org/10.15156/BIO/1264708" fmtscript = "taxref_reformat_qiime_unite.sh" } 'unite-fungi=8.2' { title = "UNITE QIIME release for Fungi - Version 8.2" - file = [ "https://files.plutof.ut.ee/public/orig/98/AE/98AE96C6593FC9C52D1C46B96C2D9064291F4DBA625EF189FEC1CCAFCF4A1691.gz" ] + file = [ "https://s3.hpc.ut.ee/plutof-public/original/6b4e0baf-fc68-4e0e-8e4a-f5cf2d68bf98.gz" ] citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2020): UNITE QIIME release for Fungi. Version 04.02.2020. UNITE Community. https://doi.org/10.15156/BIO/786385" fmtscript = "taxref_reformat_qiime_unite.sh" } 'unite-alleuk' { - title = "UNITE QIIME release for eukaryotes - Version 9.0" - file = [ "https://files.plutof.ut.ee/public/orig/8F/FC/8FFCC8A730E50FEEF8CFFEEFEF02A22FBCF7E02B7FD31C6649754834D2CB0E6F.tgz" ] - citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2022): UNITE QIIME release for eukaryotes. Version 16.10.2022. UNITE Community. https://doi.org/10.15156/BIO/2483917" + title = "UNITE QIIME release for eukaryotes - Version 10.0" + file = [ "https://s3.hpc.ut.ee/plutof-public/original/711a503c-589d-4138-a76f-6a8b89460ecf.tgz" ] + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2024): UNITE QIIME release for eukaryotes. Version 04.04.2024. UNITE Community. https://doi.org/10.15156/BIO/2959338" + fmtscript = "taxref_reformat_qiime_unite.sh" + } + 'unite-alleuk=10.0' { + title = "UNITE QIIME release for eukaryotes - Version 10.0" + file = [ "https://s3.hpc.ut.ee/plutof-public/original/711a503c-589d-4138-a76f-6a8b89460ecf.tgz" ] + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2024): UNITE QIIME release for eukaryotes. Version 04.04.2024. UNITE Community. https://doi.org/10.15156/BIO/2959338" fmtscript = "taxref_reformat_qiime_unite.sh" } 'unite-alleuk=9.0' { title = "UNITE QIIME release for eukaryotes - Version 9.0" - file = [ "https://files.plutof.ut.ee/public/orig/8F/FC/8FFCC8A730E50FEEF8CFFEEFEF02A22FBCF7E02B7FD31C6649754834D2CB0E6F.tgz" ] - citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2022): UNITE QIIME release for eukaryotes. Version 16.10.2022. UNITE Community. https://doi.org/10.15156/BIO/2483917" + file = [ "https://s3.hpc.ut.ee/plutof-public/original/ecc2ed00-1219-42c7-a951-fbe92b8cb733.tgz" ] + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2023): UNITE QIIME release for eukaryotes. Version 18.07.2023. UNITE Community. https://doi.org/10.15156/BIO/2938081" fmtscript = "taxref_reformat_qiime_unite.sh" } 'unite-alleuk=8.3' { title = "UNITE QIIME release for eukaryotes - Version 8.3" - file = [ "https://files.plutof.ut.ee/public/orig/48/29/4829D91F763E20F0F4376A60AA53FC9FBE6029A7D1BDC1B45347DD64EDE5D560.tgz" ] + file = [ "https://s3.hpc.ut.ee/plutof-public/original/93683063-613d-4190-877e-26a57a196fc7.tgz" ] citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2021): UNITE QIIME release for eukaryotes. Version 10.05.2021. UNITE Community. https://doi.org/10.15156/BIO/1264819" fmtscript = "taxref_reformat_qiime_unite.sh" } 'unite-alleuk=8.2' { title = "UNITE QIIME release for eukaryotes - Version 8.2" - file = [ "https://files.plutof.ut.ee/public/orig/6E/0E/6E0EDD5592003B47C70A1B384C3C784AA32B726AC861CD7E2BD22AEB0278675E.gz" ] + file = [ "https://s3.hpc.ut.ee/plutof-public/original/f53a2174-9fc5-4d9f-8230-59f8772c6c5c.gz" ] citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2020): UNITE QIIME release for eukaryotes. Version 04.02.2020. UNITE Community. https://doi.org/10.15156/BIO/786386" fmtscript = "taxref_reformat_qiime_unite.sh" } + ***/ 'greengenes85' { title = "Greengenes 16S - Version 13_8 - clustered at 85% similarity - for testing purposes only" file = [ "https://data.qiime2.org/2023.7/tutorials/training-feature-classifiers/85_otus.fasta", "https://data.qiime2.org/2023.7/tutorials/training-feature-classifiers/85_otu_taxonomy.txt" ] @@ -356,7 +389,7 @@ params { title = "COIDB - CO1 Taxonomy Database - Release 221216" file = [ "https://figshare.scilifelab.se/ndownloader/files/38787078" ] citation = "Sundh J, Manoharan L, Iwaszkiewicz-Eggebrecht E, Miraldo A, Andersson A, Ronquist F. COI reference sequences from BOLD DB. doi: https://doi.org/10.17044/scilifelab.20514192.v2" - fmtscript = "taxref_reformat_sintax.sh" + fmtscript = "taxref_reformat_sintax_fasta.sh" dbversion = "COIDB 221216 (https://doi.org/10.17044/scilifelab.20514192.v2)" taxlevels = "Kingdom,Phylum,Class,Order,Family,Genus,Species,BOLD_bin" } @@ -364,64 +397,78 @@ params { title = "COIDB - CO1 Taxonomy Database - Release 221216" file = [ "https://figshare.scilifelab.se/ndownloader/files/38787078" ] citation = "Sundh J, Manoharan L, Iwaszkiewicz-Eggebrecht E, Miraldo A, Andersson A, Ronquist F. COI reference sequences from BOLD DB. doi: https://doi.org/10.17044/scilifelab.20514192.v2" - fmtscript = "taxref_reformat_sintax.sh" + fmtscript = "taxref_reformat_sintax_fasta.sh" dbversion = "COIDB 221216 (https://doi.org/10.17044/scilifelab.20514192.v2)" taxlevels = "Kingdom,Phylum,Class,Order,Family,Genus,Species,BOLD_bin" } 'unite-fungi' { - title = "UNITE USEARCH/UTAX release for Fungi - Version 9.0" - file = [ "https://files.plutof.ut.ee/public/orig/19/1B/191B0D889A6B7B05DF4C103B118ABB3E0CF8EDBEA5B3E3FAB3EAFE3B72D7F3C8.gz" ] - citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2022): UNITE USEARCH/UTAX release for Fungi. Version 16.10.2022. UNITE Community. https://doi.org/10.15156/BIO/2483923" - fmtscript = "taxref_reformat_sintax.sh" - dbversion = "UNITE-fungi v9.0 (https://doi.org/10.15156/BIO/2483923)" + title = "UNITE USEARCH/UTAX release for Fungi - Version 10.0" + file = [ "https://s3.hpc.ut.ee/plutof-public/original/b27cffec-1e7d-4584-93d3-12add9fa180b.gz" ] + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2024): UNITE USEARCH/UTAX release for Fungi. UNITE Community. 10.15156/BIO/2959340" + fmtscript = "taxref_reformat_sintax_fasta.sh" + dbversion = "UNITE-fungi v10.0 (https://doi.plutof.ut.ee/doi/10.15156/BIO/2959340)" + } + 'unite-fungi=10.0' { + title = "UNITE USEARCH/UTAX release for Fungi - Version 10.0" + file = [ "https://s3.hpc.ut.ee/plutof-public/original/b27cffec-1e7d-4584-93d3-12add9fa180b.gz" ] + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2024): UNITE USEARCH/UTAX release for Fungi. UNITE Community. 10.15156/BIO/2959340" + fmtscript = "taxref_reformat_sintax_fasta.sh" + dbversion = "UNITE-fungi v10.0 (https://doi.plutof.ut.ee/doi/10.15156/BIO/2959340)" } 'unite-fungi=9.0' { title = "UNITE USEARCH/UTAX release for Fungi - Version 9.0" - file = [ "https://files.plutof.ut.ee/public/orig/19/1B/191B0D889A6B7B05DF4C103B118ABB3E0CF8EDBEA5B3E3FAB3EAFE3B72D7F3C8.gz" ] + file = [ "https://s3.hpc.ut.ee/plutof-public/original/096d35b9-1d41-42bc-a0a6-dc4f4f17cc79.gz" ] citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2022): UNITE USEARCH/UTAX release for Fungi. Version 16.10.2022. UNITE Community. https://doi.org/10.15156/BIO/2483923" - fmtscript = "taxref_reformat_sintax.sh" + fmtscript = "taxref_reformat_sintax_fasta.sh" dbversion = "UNITE-fungi v9.0 (https://doi.org/10.15156/BIO/2483923)" } 'unite-fungi=8.3' { title = "UNITE USEARCH/UTAX release for Fungi - Version 8.3" - file = [ "https://files.plutof.ut.ee/public/orig/82/CB/82CB44BBAAA7D3AEAC297B5689BDA2963E8D0666E01FE0B54096147AFAF85263.gz" ] + file = [ "https://s3.hpc.ut.ee/plutof-public/original/5f5085d1-ff89-4e9d-9c85-12165dcbf880.gz" ] citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2021): UNITE USEARCH/UTAX release for Fungi. UNITE Community. 10.15156/BIO/1280276" - fmtscript = "taxref_reformat_sintax.sh" + fmtscript = "taxref_reformat_sintax_fasta.sh" dbversion = "UNITE-fungi v8.3 (https://dx.doi.org/10.15156/BIO/1280276)" } 'unite-fungi=8.2' { title = "UNITE USEARCH/UTAX release for Fungi - Version 8.2" - file = [ "https://files.plutof.ut.ee/public/orig/E8/83/E883EB19E3EA7B64C1F652521301239831FAFE0BFF015C9E2B4786DC0976C0FC.gz" ] + file = [ "https://s3.hpc.ut.ee/plutof-public/original/a6c4d680-c99a-4252-ba67-748271c7d7f4.gz" ] citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2020): UNITE USEARCH/UTAX release for Fungi. UNITE Community. 10.15156/BIO/786375" - fmtscript = "taxref_reformat_sintax.sh" + fmtscript = "taxref_reformat_sintax_fasta.sh" dbversion = "UNITE-fungi v8.2 (https://doi.org/10.15156/BIO/786375)" } 'unite-alleuk' { - title = "UNITE USEARCH/UTAX release for eukaryotes - Version 9.0" - file = [ "https://files.plutof.ut.ee/public/orig/AB/8C/AB8C119FC82CF5AFAFCB93CA4FFFF2B42A03CF1275DE23F60B887392E8FDEA21.gz"] - citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2022): UNITE USEARCH/UTAX release for eukaryotes. Version 16.10.2022. UNITE Community. https://doi.org/10.15156/BIO/2483924" - fmtscript = "taxref_reformat_sintax.sh" - dbversion = "UNITE-alleuk v9.0 (https://doi.org/10.15156/BIO/2483924)" + title = "UNITE USEARCH/UTAX release for eukaryotes - Version 10.0" + file = [ "https://s3.hpc.ut.ee/plutof-public/original/6f19ddb6-1ac0-4834-a74c-b639688878a4.gz" ] + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2024): UNITE USEARCH/UTAX release for eukaryotes. Version 04.04.2024. UNITE Community. https://doi.org/10.15156/BIO/2959341" + fmtscript = "taxref_reformat_sintax_fasta.sh" + dbversion = "UNITE-alleuk v10.0 (https://doi.org/10.15156/BIO/2959341)" + } + 'unite-alleuk=10.0' { + title = "UNITE USEARCH/UTAX release for eukaryotes - Version 10.0" + file = [ "https://s3.hpc.ut.ee/plutof-public/original/6f19ddb6-1ac0-4834-a74c-b639688878a4.gz" ] + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2024): UNITE USEARCH/UTAX release for eukaryotes. Version 04.04.2024. UNITE Community. https://doi.org/10.15156/BIO/2959341" + fmtscript = "taxref_reformat_sintax_fasta.sh" + dbversion = "UNITE-alleuk v10.0 (https://doi.org/10.15156/BIO/2959341)" } 'unite-alleuk=9.0' { title = "UNITE USEARCH/UTAX release for eukaryotes - Version 9.0" - file = [ "https://files.plutof.ut.ee/public/orig/AB/8C/AB8C119FC82CF5AFAFCB93CA4FFFF2B42A03CF1275DE23F60B887392E8FDEA21.gz"] + file = [ "https://s3.hpc.ut.ee/plutof-public/original/00853437-b8b3-4d94-bcd8-7b942fcd8aa2.gz" ] citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2022): UNITE USEARCH/UTAX release for eukaryotes. Version 16.10.2022. UNITE Community. https://doi.org/10.15156/BIO/2483924" - fmtscript = "taxref_reformat_sintax.sh" + fmtscript = "taxref_reformat_sintax_fasta.sh" dbversion = "UNITE-alleuk v9.0 (https://doi.org/10.15156/BIO/2483924)" } 'unite-alleuk=8.3' { title = "UNITE USEARCH/UTAX release for eukaryotes - Version 8.3" - file = [ "https://files.plutof.ut.ee/public/orig/B9/35/B9351C91550A52713CB66DB7A1CEF35765310EBB23B6667AC93E714E9A9D020B.gz"] + file = [ "https://s3.hpc.ut.ee/plutof-public/original/33ef3b65-5877-4bc9-8210-8e74f5ea63d7.gz" ] citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2021): UNITE USEARCH/UTAX release for eukaryotes. UNITE Community. 10.15156/BIO/1280317" - fmtscript = "taxref_reformat_sintax.sh" + fmtscript = "taxref_reformat_sintax_fasta.sh" dbversion = "UNITE-alleuk v8.3 (https://doi.org/10.15156/BIO/1280127)" } 'unite-alleuk=8.2' { title = "UNITE USEARCH/UTAX release for eukaryotes - Version 8.2" - file = [ "https://files.plutof.ut.ee/public/orig/7B/B5/7BB51166C988E448392B213A72D4604E2ABB494E20E19E4392F7819FEBFCD036.gz"] + file = [ "https://s3.hpc.ut.ee/plutof-public/original/fd6411cf-2c68-42e5-ae94-13ffb933a5c5.gz" ] citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2020): UNITE USEARCH/UTAX release for eukaryotes. UNITE Community. 10.15156/BIO/786376" - fmtscript = "taxref_reformat_sintax.sh" + fmtscript = "taxref_reformat_sintax_fasta.sh" dbversion = "UNITE-alleuk v8.2 (https://dx.doi.org/10.15156/BIO/786376)" } } diff --git a/conf/test_its_dada_taxonomy.config b/conf/test_its_dada_taxonomy.config new file mode 100644 index 000000000..d704320b2 --- /dev/null +++ b/conf/test_its_dada_taxonomy.config @@ -0,0 +1,50 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/ampliseq -profile test_sintax, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test sintax profile' + config_profile_description = 'Minimal test dataset to check pipeline function for ITS data with the DADA2 taxonomy classifier' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '12.GB' + max_time = '6.h' + + // Input data + FW_primer = "CTTGGTCATTTAGAGGAAGTAA" + RV_primer = "TCCTGAGGGAAACTTCG" + input = params.pipelines_testdata_base_path + "ampliseq/samplesheets/Samplesheet_pacbio_ITS.tsv" + metadata = params.pipelines_testdata_base_path + "ampliseq/samplesheets/Metadata_pacbio_ITS.tsv" + pacbio = true + max_ee = 12 + cut_its = "its2" + + skip_dada_taxonomy = false + dada_ref_taxonomy = "unite-fungi" + + //this is to remove low abundance ASVs to reduce runtime of downstream processes + min_samples = 2 + min_frequency = 10 + + //produce average barplots + metadata_category_barplot = "var2,var3" + + //restrict ANCOM analysis to higher taxonomic levels + tax_agglom_max = 4 + ancom = true + + sbdiexport = true + + qiime_adonis_formula = "var2" + + diversity_rarefaction_depth = 500 +} diff --git a/docs/output.md b/docs/output.md index 9c35b0a23..332787487 100644 --- a/docs/output.md +++ b/docs/output.md @@ -23,7 +23,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [Cutadapt](#cutadapt) - Primer trimming - [MultiQC](#multiqc) - Aggregate report describing results - [ASV inferrence with DADA2](#asv-inferrence-with-dada2) - Infer Amplicon Sequence Variants (ASVs) -- [Optional ASV filtering](#optional-asv-filtering) - Filter ASVs to optimize downstream analysis +- [Optional ASV post processing](#optional-asv-post-processing) - Filter ASVs to optimize downstream analysis - [VSEARCH cluster](#vsearch-cluster) - Centroid fasta file, filtered asv table, and stats - [Barrnap](#barrnap) - Predict ribosomal RNA sequences and optional filtering - [Length filter](#length-filter) - Optionally, ASV can be filtered by length thresholds @@ -163,7 +163,9 @@ For binned quality scores in NovaSeq data, monotonicity in the fitted error mode -### Optional ASV filtering +### Optional ASV post processing + +ASV post-processing takes place after DADA2's ASV computation (i.e. after chimera removal, for example table `ASV_tax.tsv`) but _before_ taxonomic classification. Post-processing will affect all downstream files. Clustering and filters are applied sequentially, in the same sequence as shown here. All filters are off by default and can be enabled by setting thresholds as detailed in the parameter documentation. #### VSEARCH cluster @@ -184,7 +186,7 @@ This directory will hold the centroid fasta file, the filtered asv count table ( Barrnap predicts the location of ribosomal RNA genes in genomes, here it can be used to discriminate rRNA sequences from potential contamination. It supports bacteria (5S,23S,16S), archaea (5S,5.8S,23S,16S), metazoan mitochondria (12S,16S) and eukaryotes (5S,5.8S,28S,18S). -Optionally, ASV sequences can be filtered for rRNA sequences identified by Barrnap with `--filter_ssu` that can take a list of abbreviations of the above supported categories (kingdoms), e.g. `bac,arc,mito,euk`. This filtering takes place after DADA2's ASV computation (i.e. after chimera removal) but _before_ taxonomic classification (also applies to above mentioned taxonomic classification with DADA2, i.e. files `ASV_tax.tsv` & `ASV_tax_species.tsv`). +Optionally, ASV sequences can be filtered for rRNA sequences identified by Barrnap with `--filter_ssu` that can take a list of abbreviations of the above supported categories (kingdoms), e.g. `bac,arc,mito,euk`.
Output files @@ -200,7 +202,7 @@ Optionally, ASV sequences can be filtered for rRNA sequences identified by Barrn #### Length filter -Optionally, a length filter can be used to reduce potential contamination after ASV computation. For example with 515f and 806r primers the majority of 16S rRNA amplicon sequences should have a length of 253 bp and amplicons vary significantely are likely spurious. +Optionally, a length filter can be used to reduce potential contamination after ASV computation. For example with 515f and 806r primers the majority of 16S rRNA amplicon sequences should have a length of 253 bp and amplicons that differ significantly from this are likely spurious. The minimum ASV length threshold can be set by `--min_len_asv` and the maximum length threshold with `--max_len_asv`. If no threshold is set, the filter (and output) is omitted. diff --git a/docs/usage.md b/docs/usage.md index a7dad2b6a..a53a925b3 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -229,8 +229,8 @@ Pre-configured reference taxonomy databases are: | greengenes | - | - | + | (+)² | 16S rRNA | | greengenes2 | - | - | - | + | 16S rRNA | | pr2 | + | - | - | - | 18S rRNA | -| unite-fungi | + | + | - | + | eukaryotic nuclear ribosomal ITS region | -| unite-alleuk | + | + | - | + | eukaryotic nuclear ribosomal ITS region | +| unite-fungi | + | + | - | - | eukaryotic nuclear ribosomal ITS region | +| unite-alleuk | + | + | - | - | eukaryotic nuclear ribosomal ITS region | | coidb | + | + | - | - | eukaryotic Cytochrome Oxidase I (COI) | | midori2-co1 | + | - | - | - | eukaryotic Cytochrome Oxidase I (COI) | | phytoref | + | - | - | - | eukaryotic plastid 16S rRNA | diff --git a/modules/local/qiime2_ancom_tax.nf b/modules/local/qiime2_ancom_tax.nf index 1e3d320bc..cf4882764 100644 --- a/modules/local/qiime2_ancom_tax.nf +++ b/modules/local/qiime2_ancom_tax.nf @@ -43,7 +43,7 @@ process QIIME2_ANCOM_TAX { --to-tsv if [ \$(grep -v '^#' -c ${table.baseName}-level-${taxlevel}.feature-table.tsv) -lt 2 ]; then - echo ${taxlevel} > ancom/\"WARNING Summing your data at taxonomic level ${taxlevel} produced less than two rows (taxa), ANCOM can't proceed -- did you specify a bad reference taxonomy?\".txt + echo ${taxlevel} > ancom/\"WARNING ${table.baseName} Summing your data at taxonomic level ${taxlevel} produced less than two rows (taxa), ANCOM can't proceed -- did you specify a bad reference taxonomy?\".txt else qiime composition add-pseudocount \\ --i-table lvl${taxlevel}-${table} \\ diff --git a/nextflow.config b/nextflow.config index 81124505e..f81c0033b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -284,21 +284,22 @@ profiles { executor.cpus = 4 executor.memory = 8.GB } - test { includeConfig 'conf/test.config' } - test_single { includeConfig 'conf/test_single.config' } - test_multi { includeConfig 'conf/test_multi.config' } - test_doubleprimers { includeConfig 'conf/test_doubleprimers.config' } - test_pacbio_its { includeConfig 'conf/test_pacbio_its.config' } - test_iontorrent { includeConfig 'conf/test_iontorrent.config' } - test_fasta { includeConfig 'conf/test_fasta.config' } - test_failed { includeConfig 'conf/test_failed.config' } - test_full { includeConfig 'conf/test_full.config' } - test_reftaxcustom { includeConfig 'conf/test_reftaxcustom.config' } - test_qiimecustom { includeConfig 'conf/test_qiimecustom.config' } - test_novaseq { includeConfig 'conf/test_novaseq.config' } - test_pplace { includeConfig 'conf/test_pplace.config' } - test_sintax { includeConfig 'conf/test_sintax.config' } - test_multiregion { includeConfig 'conf/test_multiregion.config' } + test { includeConfig 'conf/test.config' } + test_single { includeConfig 'conf/test_single.config' } + test_multi { includeConfig 'conf/test_multi.config' } + test_doubleprimers { includeConfig 'conf/test_doubleprimers.config' } + test_pacbio_its { includeConfig 'conf/test_pacbio_its.config' } + test_iontorrent { includeConfig 'conf/test_iontorrent.config' } + test_fasta { includeConfig 'conf/test_fasta.config' } + test_failed { includeConfig 'conf/test_failed.config' } + test_full { includeConfig 'conf/test_full.config' } + test_reftaxcustom { includeConfig 'conf/test_reftaxcustom.config' } + test_qiimecustom { includeConfig 'conf/test_qiimecustom.config' } + test_novaseq { includeConfig 'conf/test_novaseq.config' } + test_pplace { includeConfig 'conf/test_pplace.config' } + test_sintax { includeConfig 'conf/test_sintax.config' } + test_its_dada_taxonomy { includeConfig 'conf/test_its_dada_taxonomy.config' } + test_multiregion { includeConfig 'conf/test_multiregion.config' } } // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile @@ -356,7 +357,7 @@ manifest { description = """Amplicon sequencing analysis workflow using DADA2 and QIIME2""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '2.10.0' + version = '2.11.0' doi = '10.5281/zenodo.1493841,10.3389/fmicb.2020.550420' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 1bba874bd..ccef5f95f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -150,6 +150,7 @@ "primer_removal": { "title": "Primer removal", "type": "object", + "description": "Spurious sequences sometimes lack primer sequences and primers introduce errors that can be removed in that step", "default": "", "properties": { "retain_untrimmed": { @@ -188,7 +189,7 @@ "read_trimming_and_quality_filtering": { "title": "Read trimming and quality filtering", "type": "object", - "description": "", + "description": "Read trimming and quality filtering is supposed to reduce spurious results and aid error correction", "default": "", "properties": { "trunclenf": { @@ -271,6 +272,7 @@ "asv_post_processing": { "title": "ASV post processing", "type": "object", + "description": "ASV post-processing takes place after ASV computation but before taxonomic assignment, it will affect all downstream processes", "default": "", "properties": { "vsearch_cluster": { @@ -370,21 +372,22 @@ "rdp", "rdp=18", "sbdi-gtdb", - "sbdi-gtdb=R06-RS202-1", - "sbdi-gtdb=R06-RS202-3", - "sbdi-gtdb=R07-RS207-1", + "sbdi-gtdb=R09-RS220-1", "sbdi-gtdb=R08-RS214-1", + "sbdi-gtdb=R07-RS207-1", + "sbdi-gtdb=R06-RS202-3", + "sbdi-gtdb=R06-RS202-1", "silva", "silva=132", "silva=138", "unite-alleuk", - "unite-alleuk=8.2", - "unite-alleuk=8.3", "unite-alleuk=9.0", + "unite-alleuk=8.3", + "unite-alleuk=8.2", "unite-fungi", - "unite-fungi=8.2", - "unite-fungi=8.3", "unite-fungi=9.0", + "unite-fungi=8.3", + "unite-fungi=8.2", "zehr-nifh", "zehr-nifh=2.5.0" ] @@ -451,20 +454,7 @@ "type": "string", "help_text": "Choose any of the supported databases, and optionally also specify the version. Database and version are separated by an equal sign (`=`, e.g. `silva=138`) . This will download the desired database and initiate taxonomic classification with QIIME2 and the chosen database.\n\nIf both, `--dada_ref_taxonomy` and `--qiime_ref_taxonomy` are used, DADA2 classification will be used for downstream analysis.\n\nThe following databases are supported:\n- SILVA ribosomal RNA gene database project - 16S rRNA\n- UNITE - eukaryotic nuclear ribosomal ITS region - ITS\n- Greengenes (only testing!)\n\nGenerally, using `silva`, `unite-fungi`, or `unite-alleuk` will select the most recent supported version. For testing purposes, the tiny database `greengenes85` (dereplicated at 85% sequence similarity) is available. For details on what values are valid, please either use an invalid value such as `x` (causing the pipeline to send an error message with all valid values) or see `conf/ref_databases.config`.", "description": "Name of supported database, and optionally also version number", - "enum": [ - "silva=138", - "silva", - "unite-fungi=8.3", - "unite-fungi=8.2", - "unite-fungi", - "unite-alleuk=9.0", - "unite-alleuk=8.3", - "unite-alleuk=8.2", - "unite-alleuk", - "greengenes85", - "greengenes2", - "greengenes2=2022.10" - ] + "enum": ["silva=138", "silva", "greengenes85", "greengenes2", "greengenes2=2022.10"] }, "qiime_ref_tax_custom": { "type": "string", @@ -517,14 +507,16 @@ "enum": [ "coidb", "coidb=221216", + "unite-fungi", + "unite-fungi=10.0", "unite-fungi=9.0", "unite-fungi=8.3", "unite-fungi=8.2", - "unite-fungi", + "unite-alleuk", + "unite-alleuk=10.0", "unite-alleuk=9.0", "unite-alleuk=8.3", - "unite-alleuk=8.2", - "unite-alleuk" + "unite-alleuk=8.2" ] }, "addsh": { @@ -575,6 +567,7 @@ "title": "ASV filtering", "type": "object", "default": "", + "description": "Filtering by taxonomy or abundance will affect all downstream analysis", "fa_icon": "fas fa-filter", "properties": { "exclude_taxa": { @@ -600,7 +593,7 @@ "downstream_analysis": { "title": "Downstream analysis", "type": "object", - "description": "", + "description": "Metadata is used here to visualize data either for quality control or publication ready figures", "default": "", "fa_icon": "fas fa-bacteria", "properties": { @@ -652,7 +645,7 @@ "differential_abundance_analysis": { "title": "Differential abundance analysis", "type": "object", - "description": "", + "description": "Differential abundance analysis relies on provided metadata", "default": "", "fa_icon": "fas fa-bacteria", "properties": { @@ -705,7 +698,7 @@ "pipeline_report": { "title": "Pipeline summary report", "type": "object", - "description": "", + "description": "Customization of the pipeline report", "default": "", "properties": { "report_template": { diff --git a/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf index 321463ae9..6180ebd2a 100644 --- a/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf @@ -162,14 +162,6 @@ def validateInputParameters() { error("Invalid command: `--trunclenr` is set, but `--trunclenf` is not. Either both parameters `--trunclenf` and `--trunclenr` must be set or none.") } - if (!["pooled", "independent", "pseudo"].contains(params.sample_inference)) { - def error_string = "Please set `--sample_inference` to one of the following:\n" + - "\t-\"independent\" (lowest sensitivity and lowest resources),\n" + - "\t-\"pseudo\" (balance between required resources and sensitivity),\n" + - "\t-\"pooled\" (highest sensitivity and resources)." - error(error_string) - } - if (params.double_primer && params.retain_untrimmed) { error("Incompatible parameters `--double_primer` and `--retain_untrimmed` cannot be set at the same time.") } @@ -237,7 +229,17 @@ def validateInputParameters() { error("Incompatible parameters: `--filter_ssu` cannot be used with `--skip_barrnap` because filtering for SSU's depends on barrnap.") } - String[] sbdi_compatible_databases = ["coidb","coidb=221216","gtdb","gtdb=R09-RS220","gtdb=R08-RS214","gtdb=R07-RS207","gtdb=R06-RS202","gtdb=R05-RS95","midori2-co1","midori2-co1=gb250","pr2","pr2=5.0.0","pr2=4.14.0","pr2=4.13.0","rdp","rdp=18","sbdi-gtdb","sbdi-gtdb=R08-RS214-1","sbdi-gtdb=R07-RS207-1","silva","silva=138","silva=132","unite-fungi","unite-fungi=9.0","unite-fungi=8.3","unite-fungi=8.2","unite-alleuk","unite-alleuk=9.0","unite-alleuk=8.3","unite-alleuk=8.2"] + String[] sbdi_compatible_databases = [ + "coidb","coidb=221216", + "gtdb","gtdb=R09-RS220","gtdb=R08-RS214","gtdb=R07-RS207","gtdb=R06-RS202","gtdb=R05-RS95", + "midori2-co1","midori2-co1=gb250", + "pr2","pr2=5.0.0","pr2=4.14.0","pr2=4.13.0", + "rdp","rdp=18", + "sbdi-gtdb","sbdi-gtdb=R09-RS220-1","sbdi-gtdb=R08-RS214-1","sbdi-gtdb=R07-RS207-1", + "silva","silva=138","silva=132", + "unite-fungi","unite-fungi=10.0","unite-fungi=9.0","unite-fungi=8.3","unite-fungi=8.2", + "unite-alleuk","unite-alleuk=10.0","unite-alleuk=9.0","unite-alleuk=8.3","unite-alleuk=8.2" + ] if (params.sbdiexport){ if (params.sintax_ref_taxonomy ) { if (!Arrays.stream(sbdi_compatible_databases).anyMatch(entry -> params.sintax_ref_taxonomy.toString().equals(entry)) ) { diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index fa2211263..8d5afed33 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -47,7 +47,7 @@ if (params.dada_ref_tax_custom) { val_dada_ref_taxonomy = "user" } else if (params.dada_ref_taxonomy && !params.skip_dada_taxonomy && !params.skip_taxonomy) { //standard ref taxonomy input from params.dada_ref_taxonomy & conf/ref_databases.config - ch_dada_ref_taxonomy = Channel.fromList(params.dada_ref_databases[params.dada_ref_taxonomy]["file"]).map { file(it) } + ch_dada_ref_taxonomy = params.dada_ref_databases.containsKey(params.dada_ref_taxonomy) ? Channel.fromList(params.dada_ref_databases[params.dada_ref_taxonomy]["file"]).map { file(it) } : Channel.empty() val_dada_ref_taxonomy = params.dada_ref_taxonomy.replace('=','_').replace('.','_') } else { ch_dada_ref_taxonomy = Channel.empty() @@ -67,7 +67,7 @@ if (params.qiime_ref_tax_custom) { } val_qiime_ref_taxonomy = "user" } else if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) { - ch_qiime_ref_taxonomy = Channel.fromList(params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]).map { file(it) } + ch_qiime_ref_taxonomy = params.qiime_ref_databases.containsKey(params.qiime_ref_taxonomy) ? Channel.fromList(params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]).map { file(it) } : Channel.empty() val_qiime_ref_taxonomy = params.qiime_ref_taxonomy.replace('=','_').replace('.','_') } else { ch_qiime_ref_taxonomy = Channel.empty() @@ -75,7 +75,7 @@ if (params.qiime_ref_tax_custom) { } if (params.sintax_ref_taxonomy && !params.skip_taxonomy) { - ch_sintax_ref_taxonomy = Channel.fromList(params.sintax_ref_databases[params.sintax_ref_taxonomy]["file"]).map { file(it) } + ch_sintax_ref_taxonomy = params.sintax_ref_databases.containsKey(params.sintax_ref_taxonomy) ? Channel.fromList(params.sintax_ref_databases[params.sintax_ref_taxonomy]["file"]).map { file(it) } : Channel.empty() val_sintax_ref_taxonomy = params.sintax_ref_taxonomy.replace('=','_').replace('.','_') } else { ch_sintax_ref_taxonomy = Channel.empty() @@ -88,7 +88,7 @@ if (params.kraken2_ref_tax_custom) { val_kraken2_ref_taxonomy = "user" } else if (params.kraken2_ref_taxonomy && !params.skip_taxonomy) { //standard ref taxonomy input from params.dada_ref_taxonomy & conf/ref_databases.config - ch_kraken2_ref_taxonomy = Channel.fromList(params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["file"]).map { file(it) } + ch_kraken2_ref_taxonomy = params.kraken2_ref_databases.containsKey(params.kraken2_ref_taxonomy) ? Channel.fromList(params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["file"]).map { file(it) } : Channel.empty() val_kraken2_ref_taxonomy = params.kraken2_ref_taxonomy.replace('=','_').replace('.','_') } else { ch_kraken2_ref_taxonomy = Channel.empty() @@ -122,16 +122,16 @@ tax_agglom_max = params.tax_agglom_max //use custom taxlevels from --dada_assign_taxlevels or database specific taxlevels if specified in conf/ref_databases.config if ( params.dada_ref_taxonomy ) { taxlevels = params.dada_assign_taxlevels ? "${params.dada_assign_taxlevels}" : - params.dada_ref_databases[params.dada_ref_taxonomy]["taxlevels"] ?: "" + params.dada_ref_databases.containsKey(params.dada_ref_taxonomy) && params.dada_ref_databases[params.dada_ref_taxonomy]["taxlevels"] ? params.dada_ref_databases[params.dada_ref_taxonomy]["taxlevels"] : "" } else { taxlevels = params.dada_assign_taxlevels ? "${params.dada_assign_taxlevels}" : "" } if ( params.sintax_ref_taxonomy ) { - sintax_taxlevels = params.sintax_ref_databases[params.sintax_ref_taxonomy]["taxlevels"] ?: "" + sintax_taxlevels = params.sintax_ref_databases.containsKey(params.sintax_ref_taxonomy) && params.sintax_ref_databases[params.sintax_ref_taxonomy]["taxlevels"] ? params.sintax_ref_databases[params.sintax_ref_taxonomy]["taxlevels"] : "" } else { sintax_taxlevels = "" } if ( params.kraken2_ref_taxonomy ) { kraken2_taxlevels = params.kraken2_assign_taxlevels ? "${params.kraken2_assign_taxlevels}" : - params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["taxlevels"] ?: "" + params.kraken2_ref_databases.containsKey(params.kraken2_ref_taxonomy) && params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["taxlevels"] ? params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["taxlevels"] : "" } else { kraken2_taxlevels = params.kraken2_assign_taxlevels ? "${params.kraken2_assign_taxlevels}" : "" } //make sure that taxlevels adheres to requirements when mixed with addSpecies @@ -268,13 +268,13 @@ workflow AMPLISEQ { ch_input_reads = Channel.empty() if ( params.input ) { // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/ - ch_input_reads = Channel.fromSamplesheet("input") + ch_input_reads = Channel.fromSamplesheet("input") // meta: meta.sample, meta.run .map{ meta, readfw, readrv -> meta.single_end = single_end.toBoolean() def reads = single_end ? readfw : [readfw,readrv] - if ( !meta.single_end && !readrv ) { error("Entry `reverseReads` is missing in $params.input for $meta.id, either correct the samplesheet or use `--single_end`, `--pacbio`, or `--iontorrent`") } // make sure that reverse reads are present when single_end isnt specified - if ( !meta.single_end && ( readfw.getSimpleName() == meta.id || readrv.getSimpleName() == meta.id ) ) { error("Entry `sampleID` cannot be identical to simple name of `forwardReads` or `reverseReads`, please change `sampleID` in $params.input for sample $meta.id") } // sample name and any file name without extensions arent identical, because rename_raw_data_files.nf would forward 3 files (2 renamed +1 input) instead of 2 in that case - if ( meta.single_end && ( readfw.getSimpleName() == meta.id+"_1" || readfw.getSimpleName() == meta.id+"_2" ) ) { error("Entry `sampleID`+ `_1` or `_2` cannot be identical to simple name of `forwardReads`, please change `sampleID` in $params.input for sample $meta.id") } // sample name and file name without extensions arent identical, because rename_raw_data_files.nf would forward 2 files (1 renamed +1 input) instead of 1 in that case + if ( !meta.single_end && !readrv ) { error("Entry `reverseReads` is missing in $params.input for $meta.sample, either correct the samplesheet or use `--single_end`, `--pacbio`, or `--iontorrent`") } // make sure that reverse reads are present when single_end isn't specified + if ( !meta.single_end && ( readfw.getSimpleName() == meta.sample || readrv.getSimpleName() == meta.sample ) ) { error("Entry `sampleID` cannot be identical to simple name of `forwardReads` or `reverseReads`, please change `sampleID` in $params.input for sample $meta.sample") } // sample name and any file name without extensions aren't identical, because rename_raw_data_files.nf would forward 3 files (2 renamed +1 input) instead of 2 in that case + if ( meta.single_end && ( readfw.getSimpleName() == meta.sample+"_1" || readfw.getSimpleName() == meta.sample+"_2" ) ) { error("Entry `sampleID`+ `_1` or `_2` cannot be identical to simple name of `forwardReads`, please change `sampleID` in $params.input for sample $meta.sample") } // sample name and file name without extensions aren't identical, because rename_raw_data_files.nf would forward 2 files (1 renamed +1 input) instead of 1 in that case return [meta, reads] } } else if ( params.input_fasta ) { ch_input_fasta = Channel.fromPath(params.input_fasta, checkIfExists: true)