diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..2951ad8 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,15 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true +indent_size = 4 +indent_style = space + +[*.{md,yml,yaml,cff}] +indent_size = 2 + +[*.nf.test] +insert_final_newline = false diff --git a/.gitignore b/.gitignore index cc3b658..62d31c0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,26 +1,16 @@ -.DS_Store - -*.pyc -__pycahce__ - -nextflow .nextflow* work/ -*.dot - -Results/ results/ -report/ -Report/ - -*.log -.nfs* - -*.sif +.DS_Store +*.code-workspace +.screenrc +.*.sw? +__pycache__ +*.pyo +*.pyc -pan_gene_slurm.sh *.stdout *.stderr .literature -.test \ No newline at end of file +pangene-test/ diff --git a/.nf-core.yml b/.nf-core.yml new file mode 100644 index 0000000..3805dc8 --- /dev/null +++ b/.nf-core.yml @@ -0,0 +1 @@ +repository_type: pipeline diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..fc52181 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,5 @@ +repos: + - repo: https://github.com/pre-commit/mirrors-prettier + rev: "v3.1.0" + hooks: + - id: prettier diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 0000000..24a3687 --- /dev/null +++ b/.prettierignore @@ -0,0 +1,19 @@ +includes/Maven_Pro/ + +# gitignore +.nextflow* +work/ +results/ +.DS_Store +*.code-workspace +.screenrc +.*.sw? +__pycache__ +*.pyo +*.pyc + +*.stdout +*.stderr + +.literature +pangene-test/ diff --git a/.prettierrc.yml b/.prettierrc.yml new file mode 100644 index 0000000..c81f9a7 --- /dev/null +++ b/.prettierrc.yml @@ -0,0 +1 @@ +printWidth: 120 diff --git a/README.md b/README.md index 2eaf965..5efb30f 100644 --- a/README.md +++ b/README.md @@ -1,94 +1,85 @@ -# PAN-GENE +# PANGENE + A NextFlow pipeline for pan-genome annotation. ## Pipeline Flowchart ```mermaid flowchart TD - ribo_db((ribo_db)) - SAMPLESHEET((samples)) - TE_LIBRARIES(("[te_libs]")) - TARGET_ASSEMBLIES(("[assemblies]")) - EXTERNAL_PROTEIN_SEQS(("[ext_prots]")) - - GUNZIP_PROT[GUNZIP] - GUNZIP_TE[GUNZIP] - SKIP_EDTA{Skip EDTA} - pend((dev)) - - TE_LIBRARIES --> GUNZIP_TE - GUNZIP_TE --> SKIP_EDTA - - TARGET_ASSEMBLIES --> GUNZIP - GUNZIP --> FASTA_VALIDATE - FASTA_VALIDATE --> FASTA_PERFORM_EDTA - FASTA_VALIDATE --> SKIP_EDTA - - SKIP_EDTA --> REPEATMASKER - FASTA_PERFORM_EDTA --> REPEATMASKER - REPEATMASKER --> STAR_GENOMEGENERATE - - SAMPLESHEET --> SAMPLESHEET_CHECK - SAMPLESHEET_CHECK --> |Technical replicates|CAT_FASTQ - CAT_FASTQ --> FASTQC - SAMPLESHEET_CHECK --> FASTQC - FASTQC --> FASTP - - ribo_db --> SORTMERNA - FASTP --> SORTMERNA - SORTMERNA --> STAR_ALIGN - STAR_GENOMEGENERATE --> STAR_ALIGN - STAR_ALIGN --> GROUP_BY_ASSEMBLY([Group by assembly]) - GROUP_BY_ASSEMBLY --> SAMTOOLS_CAT - SAMTOOLS_CAT --> |RNASeq bam|BRAKER3 - - REPEATMASKER --> BRAKER3 - - EXTERNAL_PROTEIN_SEQS --> GUNZIP_PROT - GUNZIP_PROT --> CAT - CAT --> BRAKER3 - - BRAKER3 --> pend - - subgraph Params + subgraph PrepareAssembly [ ] TARGET_ASSEMBLIES TE_LIBRARIES - SAMPLESHEET - ribo_db - EXTERNAL_PROTEIN_SEQS - end - - subgraph GenomePrep - GUNZIP FASTA_VALIDATE - GUNZIP_TE - FASTA_PERFORM_EDTA - SKIP_EDTA + fasta_file_from_fasta_validate + EDTA + REPEATMODELER + te_lib_absent_node REPEATMASKER - STAR_GENOMEGENERATE - end - - subgraph Braker - CAT - GUNZIP_PROT - BRAKER3 end - subgraph SamplePrep - SAMPLESHEET_CHECK + TARGET_ASSEMBLIES(["[target_assemblies]"]) + TE_LIBRARIES(["[te_libs]"]) + TARGET_ASSEMBLIES --> FASTA_VALIDATE + FASTA_VALIDATE --- |Fasta|fasta_file_from_fasta_validate(( )) + fasta_file_from_fasta_validate --> |or|EDTA + fasta_file_from_fasta_validate --> |default|REPEATMODELER + REPEATMODELER --- te_lib_absent_node(( )) + EDTA --- te_lib_absent_node + TE_LIBRARIES --> REPEATMASKER + te_lib_absent_node --> REPEATMASKER + + subgraph Samplesheet [ ] + SAMPLESHEET CAT_FASTQ FASTQC FASTP + FASTP_FASTQC SORTMERNA - STAR_ALIGN - GROUP_BY_ASSEMBLY + fasta_file_for_star + STAR SAMTOOLS_CAT end - style Params fill:#00FFFF21,stroke:#00FFFF21 - style GenomePrep fill:#00FFFF21,stroke:#00FFFF21 - style SamplePrep fill:#00FFFF21,stroke:#00FFFF21 - style Braker fill:#00FFFF21,stroke:#00FFFF21 + SAMPLESHEET([samplesheet]) + SAMPLESHEET --> |Tech. reps|CAT_FASTQ + CAT_FASTQ --> FASTQC + SAMPLESHEET --> FASTQC + FASTQC --> FASTP + FASTP --> FASTP_FASTQC[FASTQC] + FASTP_FASTQC --> SORTMERNA + fasta_file_for_star(( )) + fasta_file_for_star --> |Fasta|STAR + SORTMERNA --> STAR + STAR --> SAMTOOLS_CAT + + subgraph Annotation [ ] + anno_fasta(( )) + anno_masked_fasta(( )) + anno_bam(( )) + EXTERNAL_PROTEIN_SEQS(["[ext_prots]"]) + XREF_ANNOTATIONS(["[xref_annotations]"]) + CAT + BRAKER3 + GFFREAD + LIFTOFF + end + + PrepareAssembly --> |Fasta, Masked fasta|Annotation + Samplesheet --> |RNASeq bam|Annotation + + XREF_ANNOTATIONS --> |xref_gff|GFFREAD + XREF_ANNOTATIONS --> |xref_fasta|LIFTOFF + GFFREAD --> LIFTOFF + anno_fasta --> |Fasta|LIFTOFF + + EXTERNAL_PROTEIN_SEQS --> CAT + anno_masked_fasta --> |Masked fasta|BRAKER3 + anno_bam --> |RNASeq bam|BRAKER3 + CAT --> BRAKER3 + + style Samplesheet fill:#00FFFF21,stroke:#00FFFF21 + style PrepareAssembly fill:#00FFFF21,stroke:#00FFFF21 + style Annotation fill:#00FFFF21,stroke:#00FFFF21 ``` ## Plant&Food Users @@ -96,10 +87,9 @@ flowchart TD Configure the pipeline by modifying `nextflow.config` and submit to SLURM for execution. ```bash -sbatch ./pan_gene_pfr.sh +sbatch ./pangene_pfr ``` - ## Third-party Sources Some software components of this pipeline have been adopted from following third-party sources: @@ -112,5 +102,6 @@ Some software components of this pipeline have been adopted from following third > > _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x). -2. rewarewaannotation [MIT](https://github.com/kherronism/rewarewaannotation/blob/master/LICENSE): https://github.com/kherronism/rewarewaannotation -3. assembly_qc [GPL-3.0](https://github.com/Plant-Food-Research-Open/assembly_qc/blob/main/LICENSE): https://github.com/Plant-Food-Research-Open/assembly_qc \ No newline at end of file +2. nf-core/rnaseq [MIT](https://github.com/nf-core/rnaseq/blob/master/LICENSE): https://github.com/nf-core/rnaseq +3. rewarewaannotation [MIT](https://github.com/kherronism/rewarewaannotation/blob/master/LICENSE): https://github.com/kherronism/rewarewaannotation +4. assembly_qc [GPL-3.0](https://github.com/Plant-Food-Research-Open/assembly_qc/blob/main/LICENSE): https://github.com/Plant-Food-Research-Open/assembly_qc diff --git a/TODO.md b/TODO.md index a9be74b..3742442 100644 --- a/TODO.md +++ b/TODO.md @@ -1,4 +1,21 @@ -- [ ] Rename perform_edta_annotation to FASTA_PERFORM_EDTA -- [ ] Extract subworkflows -- [ ] STAR ignores softmasking and, thus, should be fed the unmasked genome so that masking and mapping can run in parallel. -- [ ] Add --eval=reference.gtf \ No newline at end of file +- [ ] Add --eval=reference.gtf +- [ ] From Ross regarding post-processing: + +> [9:49 am] Ross Crowhurst +> Here is an easy one: BLATSp vs swissprot & Arabidpsis and check query is with set thresholds of reference - if so accept; If not move to BLASTp vs Uniref90 or Refeq (or some other predetermined model species) - same deal accept if within threshold limits. Else BLASTn of cds vs NCBI nt (really scrapping the bottom of the barrel here). If not a hit to anything then chances are its garbage and should be removed. Some ppl might try to claim its a unique protein to the genotype but in 20 years I have never seen one of those be supported - mostly this category is garbage. The screen agains NCBI nt also assists to classify "bits" as well retroposonss etc. Idea being you want to remove garbage predictions - as this does take time you can see why some papers just filter out by size. + +- [ ] From Cecilia: + +> https://github.com/zhaotao1987/SynNet-Pipeline + +- [ ] From Ross: + +> https://www.biorxiv.org/content/10.1101/096529v2.full.pdf + +- [ ] Sort out EDTA testing + +- Mib finder, eggnog, blastp against TAIR and uniprot (Wait) +- entap to merge (Wait) +- trinity and PASA + StringTie2 -> Evigene (Do) +- othrofinder paper +- gffcompre on braker and liftoff diff --git a/assets/rrna-db-defaults.txt b/assets/rrna-db-defaults.txt index e2bc4e6..4223356 100644 --- a/assets/rrna-db-defaults.txt +++ b/assets/rrna-db-defaults.txt @@ -5,4 +5,4 @@ https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/s https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/silva-bac-16s-id90.fasta https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/silva-bac-23s-id98.fasta https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/silva-euk-18s-id95.fasta -https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/silva-euk-28s-id98.fasta \ No newline at end of file +https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/silva-euk-28s-id98.fasta diff --git a/assets/rrna-db-test.txt b/assets/rrna-db-test.txt new file mode 100644 index 0000000..20116f9 --- /dev/null +++ b/assets/rrna-db-test.txt @@ -0,0 +1 @@ +https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/silva-euk-28s-id98.fasta diff --git a/bin/make-samplesheet.py b/bin/make-samplesheet.py index bc39f55..b4ad0b7 100755 --- a/bin/make-samplesheet.py +++ b/bin/make-samplesheet.py @@ -282,4 +282,4 @@ def main(): make_samplesheet_from_command(input_path_or_command, exp_name) if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/cleanNXF.sh b/cleanNXF.sh index c566dbf..8c64a3e 100755 --- a/cleanNXF.sh +++ b/cleanNXF.sh @@ -8,4 +8,4 @@ for i in $(ls work | grep -v "conda"); do rm -rf "work/$i" done -echo "Cleaned work..." \ No newline at end of file +echo "Cleaned work..." diff --git a/conf/base.config b/conf/base.config index 4cdec8d..6b0d419 100644 --- a/conf/base.config +++ b/conf/base.config @@ -1,19 +1,33 @@ profiles { - slurm { + pfr { process { executor = 'slurm' } + + apptainer { + envWhitelist = 'APPTAINER_BINDPATH,APPTAINER_BIND' + } } - + local { process { executor = 'local' } } + + apptainer { + apptainer.enabled = true + apptainer.autoMounts= true + apptainer.registry = 'quay.io' + } + + docker { + docker.enabled = true + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.registry = 'quay.io' + } } -// Source: https://github.com/nf-core/rnaseq -// License: https://github.com/nf-core/rnaseq/blob/master/LICENSE process { cpus = { check_max( 1 * task.attempt, 'cpus' ) } @@ -24,12 +38,6 @@ process { maxRetries = 1 maxErrors = '-1' - // Process-specific resource requirements - // NOTE - Please try and re-use the labels below as much as possible. - // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. - // If possible, it would be nice to keep the same label naming convention when - // adding in your local modules too. - // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel:process_single { cpus = { check_max( 1 , 'cpus' ) } memory = { check_max( 6.GB * task.attempt, 'memory' ) } @@ -53,17 +61,13 @@ process { withLabel:process_long { time = { check_max( 20.h * task.attempt, 'time' ) } } - withLabel:process_week_long { - time = { check_max( 7.days * task.attempt, 'time' ) } - } withLabel:process_high_memory { memory = { check_max( 200.GB * task.attempt, 'memory' ) } } -} -singularity { - enabled = true - autoMounts = true + withName:CUSTOM_DUMPSOFTWAREVERSIONS { + cache = false + } } nextflow { @@ -72,8 +76,6 @@ nextflow { } } -// Source: https://github.com/nf-core/rnaseq -// License: https://github.com/nf-core/rnaseq/blob/master/LICENSE def check_max(obj, type) { if (type == 'memory') { try { diff --git a/conf/manifest.config b/conf/manifest.config new file mode 100644 index 0000000..95537cd --- /dev/null +++ b/conf/manifest.config @@ -0,0 +1,10 @@ +manifest { + name = 'pangene' + author = """Usman Rashid, Jason Shiller""" + homePage = 'https://github.com/PlantandFoodResearch/pan-gene' + description = """A NextFlow pipeline for pan-genome annotation""" + mainScript = 'main.nf' + nextflowVersion = '!>=23.04.4' + version = '0.2' + doi = '' +} diff --git a/conf/modules.config b/conf/modules.config index 66205cb..fc489bf 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1,18 +1,27 @@ process { - withName: 'EDTA' { + withName: 'EDTA_EDTA' { ext.args = [ - params.edta.is_sensitive ? "--sensitive 1" : "--sensitive 0", + params.edta_is_sensitive ? "--sensitive 1" : "--sensitive 0", "--anno 0", "--force 1" ].join(' ').trim() + + publishDir = [ + path: { "${params.outdir}/edta" }, + mode: "copy", + pattern: '*.EDTA.TElib.fa', + enabled: params.save_annotated_te_lib + ] } - withName: 'RESTORE_EDTA_IDS' { + withName: 'REPEATMODELER_REPEATMODELER' { + ext.args = '-LTRStruct' + publishDir = [ - path: { "${params.outdir}/edta/${meta.id}" }, + path: { "${params.outdir}/repeatmodeler" }, mode: "copy", - saveAs: { filename -> filename.equals("versions.yml") ? null : filename }, - enabled: params.edta.save_outputs + pattern: '*.fa', + enabled: params.save_annotated_te_lib ] } @@ -21,19 +30,17 @@ process { "-no_is", "-xsmall", ].join(' ').trim() - + publishDir = [ path: { "${params.outdir}/repeatmasker" }, mode: "copy", saveAs: { filename -> filename.equals("versions.yml") ? null : filename }, - enabled: params.repeatmasker.save_outputs + enabled: params.repeatmasker_save_outputs ] } } -// https://github.com/nf-core/rnaseq -// MIT: https://github.com/nf-core/rnaseq/blob/master/LICENSE -if(!params.sample_prep.skip_fastqc) { +if(!params.skip_fastqc) { process { withName: '.*:FASTQ_FASTQC_UMITOOLS_FASTP:FASTQC_RAW' { ext.args = '--quiet' @@ -50,17 +57,20 @@ if(!params.sample_prep.skip_fastqc) { } } -// https://github.com/nf-core/rnaseq -// MIT: https://github.com/nf-core/rnaseq/blob/master/LICENSE -if(!params.sample_prep.skip_fastp) { +if(!params.skip_fastp) { process { withName: '.*:FASTQ_FASTQC_UMITOOLS_FASTP:FASTP' { - ext.args = params.sample_prep.extra_fastp_args ?: '' + ext.args = params.extra_fastp_args ?: '' publishDir = [ [ - path: { "${params.outdir}/fastp" }, + path: { "${params.outdir}/fastp/html" }, mode: "copy", - pattern: "*.{json,html}" + pattern: "*.{html}" + ], + [ + path: { "${params.outdir}/fastp/json" }, + mode: "copy", + pattern: "*.{json}" ], [ path: { "${params.outdir}/fastp/log" }, @@ -71,14 +81,14 @@ if(!params.sample_prep.skip_fastp) { path: { "${params.outdir}/fastp" }, mode: "copy", pattern: "*.fastq.gz", - enabled: params.sample_prep.save_trimmed + enabled: params.save_trimmed ] ] } } } -if (params.sample_prep.remove_ribo_rna) { +if (params.remove_ribo_rna) { process { withName: 'SORTMERNA' { ext.args = '--num_alignments 1 -v' @@ -92,40 +102,44 @@ if (params.sample_prep.remove_ribo_rna) { path: { "${params.outdir}/sortmerna" }, mode: "copy", pattern: "*.fastq.gz", - enabled: params.sample_prep.save_non_ribo_reads + enabled: params.save_non_ribo_reads ] ] } } } -// https://github.com/kherronism/rewarewaannotation -// MIT: https://github.com/nf-core/rnaseq/blob/master/LICENSE process { - - // Changes: - // Introduced additional defaults withName: STAR_ALIGN { ext.args = [ "--outSAMstrandField intronMotif", "--outSAMtype BAM SortedByCoordinate", "--readFilesCommand gunzip -c", - "--alignIntronMax ${params.star_align.max_intron_length}", - params.star_align.extra_star_align_args ? params.star_align.extra_star_align_args.split("\\s(?=--)") : '' + "--alignIntronMax ${params.star_max_intron_length}", + params.star_align_extra_args ? params.star_align_extra_args.split("\\s(?=--)") : '' ].flatten().unique(false).join(' ').trim() ext.prefix = { "${meta.id}" } publishDir = [ path: { "${params.outdir}/star/alignment" }, mode: "copy", saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.star_align.save_outputs + enabled: params.star_save_outputs + ] + } + + withName: '.*:ALIGN_RNASEQ:SAMTOOLS_CAT' { + publishDir = [ + path: { "${params.outdir}/star/cat_bam" }, + mode: "copy", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_cat_bam ] } withName: BRAKER3 { ext.args = [ "--gff3", - params.braker.extra_braker_args ? params.braker.extra_braker_args.split("\\s(?=--)") : '' + params.braker_extra_args ? params.braker_extra_args.split("\\s(?=--)") : '' ].flatten().unique(false).join(' ').trim() ext.prefix = { "${meta.id}" } publishDir = [ @@ -134,4 +148,39 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] } -} \ No newline at end of file +} + +if(params.liftoff_xref_annotations) { + process { + withName: LIFTOFF { + ext.args = ' ' + ext.args = [ + '-exclude_partial', + '-copies', + '-polish', + "-a $params.liftoff_coverage", + "-s $params.liftoff_identity" + ].join(' ').trim() + publishDir = [ + path: { "${params.outdir}/liftoff/${meta.id}" }, + mode: "copy", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: GFFREAD { + ext.args = '--no-pseudo --keep-genes' + } + } +} + +process { + withName: 'CUSTOM_DUMPSOFTWAREVERSIONS' { + publishDir = [ + path: params.outdir, + pattern: "software_versions.yml", + mode: "copy", + enabled: true + ] + } +} diff --git a/conf/reporting_defaults.config b/conf/reporting_defaults.config index c85d378..178522d 100644 --- a/conf/reporting_defaults.config +++ b/conf/reporting_defaults.config @@ -1,5 +1,3 @@ -// Source: https://github.com/nf-core/rnaseq -// License: https://github.com/nf-core/rnaseq/blob/master/LICENSE def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true @@ -12,4 +10,4 @@ report { trace { enabled = true file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" -} \ No newline at end of file +} diff --git a/conf/test_params.json b/conf/test_params.json new file mode 100644 index 0000000..0dc25f8 --- /dev/null +++ b/conf/test_params.json @@ -0,0 +1,28 @@ +{ + "target_assemblies": [ + ["red5_v2p1", "pangene-test/target/red5_v2p1_chr1_1200k.fasta.gz"], + ["donghong", "pangene-test/target/donghong_chr1_600k.fsa.gz"] + ], + + "te_libraries": [["donghong", "pangene-test/te_lib/donghong.TElib.fa.gz"]], + + "samplesheet": "pangene-test/samplesheet/samplesheet.csv", + + "remove_ribo_rna": true, + "ribo_database_manifest": "assets/rrna-db-test.txt", + + "external_protein_fastas": [ + "pangene-test/ext_prot/RU01_20221115150135_chr1_600k.pep.fasta.gz", + "pangene-test/ext_prot/RU01_20221115150135_chr2_600k.pep.fasta.gz" + ], + + "braker_extra_args": "--testMode --species=arabidopsis --useexisting", + + "liftoff_xref_annotations": [ + ["pangene-test/liftoff/Russell_V2a_chr1_600k.fsa.gz", "pangene-test/liftoff/Russell_V2a_chr1_600k.gff3.gz"], + ["pangene-test/liftoff/TAIR10_chr1_600k.fas.gz", "pangene-test/liftoff/TAIR10_chr1_600k.gff3.gz"] + ], + + "max_cpus": 2, + "max_memory": "3.GB" +} diff --git a/main.nf b/main.nf index c8a54e2..9ed32f7 100755 --- a/main.nf +++ b/main.nf @@ -2,8 +2,12 @@ nextflow.enable.dsl=2 -include { PAN_GENE } from './workflows/pan_gene.nf' +include { PANGENE } from './workflows/pangene.nf' workflow { - PAN_GENE() -} \ No newline at end of file + PFR_PANGENE() +} + +workflow PFR_PANGENE { + PANGENE() +} diff --git a/modules.json b/modules.json new file mode 100644 index 0000000..4e8f0a9 --- /dev/null +++ b/modules.json @@ -0,0 +1,152 @@ +{ + "name": "PlantandFoodResearch/pangene", + "homePage": "https://github.com/PlantandFoodResearch/pangene", + "repos": { + "git@github.com:PlantandFoodResearch/nxf-modules.git": { + "modules": { + "pfr": { + "custom/restoregffids": { + "branch": "main", + "git_sha": "e9f6bdd634bdbcd52c5568ba82f16176ec06631f", + "installed_by": ["fasta_edta_lai", "modules"] + }, + "custom/shortenfastaids": { + "branch": "main", + "git_sha": "5e0e41b51d7fc7f68ae43692b6fe19b95d7f3a8c", + "installed_by": ["fasta_edta_lai", "modules"] + }, + "edta/edta": { + "branch": "main", + "git_sha": "35468dbb1f35eb17a43d7e05544601c7c3f8cd90", + "installed_by": ["fasta_edta_lai", "modules"] + }, + "lai": { + "branch": "main", + "git_sha": "7e6e3cb41362a045c6bb6065903efa0eba246e87", + "installed_by": ["fasta_edta_lai"] + }, + "liftoff": { + "branch": "main", + "git_sha": "444b35f4e6285115f84d2bfce49fc0e6d8a2754e", + "installed_by": ["modules"] + }, + "repeatmodeler/builddatabase": { + "branch": "main", + "git_sha": "9da0567f685b2772f65290f2bd6d6347671c8310", + "installed_by": ["modules"] + }, + "repeatmodeler/repeatmodeler": { + "branch": "main", + "git_sha": "9da0567f685b2772f65290f2bd6d6347671c8310", + "installed_by": ["modules"] + } + } + }, + "subworkflows": { + "pfr": { + "fasta_edta_lai": { + "branch": "main", + "git_sha": "5ae026a98da1331433fa4cf5b667c9abdf43e395", + "installed_by": ["subworkflows"] + } + } + } + }, + "git@github.com:kherronism/nf-modules.git": { + "modules": { + "kherronism": { + "braker3": { + "branch": "dev", + "git_sha": "b01fec253f3b73b24e3f166a96d4beb49e58b0a6", + "installed_by": ["modules"] + }, + "repeatmasker": { + "branch": "dev", + "git_sha": "6778d5bb4c9f3d597753c699226fcde8d0811bfb", + "installed_by": ["modules"] + } + } + } + }, + "https://github.com/nf-core/modules.git": { + "modules": { + "nf-core": { + "cat/cat": { + "branch": "master", + "git_sha": "d593e8f6b7d1bbbb2acf43a4b9efeeac8d6720f2", + "installed_by": ["modules"] + }, + "cat/fastq": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "custom/dumpsoftwareversions": { + "branch": "master", + "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93", + "installed_by": ["modules"] + }, + "fastavalidator": { + "branch": "master", + "git_sha": "89ff95427f695086369d7927a3c17cea2a37a382", + "installed_by": ["modules"] + }, + "fastp": { + "branch": "master", + "git_sha": "d086322563bdbb08c94bf15a7db58a39ccdb1520", + "installed_by": ["fastq_fastqc_umitools_fastp"] + }, + "fastqc": { + "branch": "master", + "git_sha": "617777a807a1770f73deb38c80004bac06807eef", + "installed_by": ["fastq_fastqc_umitools_fastp", "modules"] + }, + "gffread": { + "branch": "master", + "git_sha": "a2d6c3082c5c44b4155a3246daff36701ee49af8", + "installed_by": ["modules"] + }, + "gunzip": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "samtools/cat": { + "branch": "master", + "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", + "installed_by": ["modules"] + }, + "sortmerna": { + "branch": "master", + "git_sha": "ce558e30784469b88a16923ca96d81899d240b42", + "installed_by": ["modules"] + }, + "star/align": { + "branch": "master", + "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", + "installed_by": ["modules"] + }, + "star/genomegenerate": { + "branch": "master", + "git_sha": "d87a6e2156c2099c09280fa70776eaf0a824817a", + "installed_by": ["modules"] + }, + "umitools/extract": { + "branch": "master", + "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53", + "installed_by": ["fastq_fastqc_umitools_fastp"] + } + } + }, + "subworkflows": { + "nf-core": { + "fastq_fastqc_umitools_fastp": { + "branch": "master", + "git_sha": "3e8b0c1144ccf60b7848efbdc2be285ff20b49ee", + "installed_by": ["subworkflows"] + } + } + } + } + } +} diff --git a/modules/kherronism/CHANGELOG.md b/modules/kherronism/CHANGELOG.md deleted file mode 100644 index 5665af2..0000000 --- a/modules/kherronism/CHANGELOG.md +++ /dev/null @@ -1,21 +0,0 @@ -## Source - -- Repo: https://github.com/kherronism/rewarewaannotation/tree/1a39a83e22fe2d8665a8c6dc49772cce6579983f -- License: See LICENSE file - -## Changes - -### repeatmasker - -1. Added stub -2. Added author in meta.yml -3. Changed input "tuple val(meta), path(lib)" to "path(lib)" - -### braker3 - -1. Added stub -2. Added author in meta.yml -3. Made output hintsfile optional as it is not produced for ab-initio annotation. -4. Directed `--AUGUSTUS_CONFIG_PATH` to work folder. This avoids "species already exists" error on subsequent runs with same species. -5. Updated version extractor. -6. Added `containerOptions "-B $TMPDIR:$TMPDIR"` \ No newline at end of file diff --git a/modules/kherronism/LICENSE b/modules/kherronism/LICENSE deleted file mode 100644 index 4b42925..0000000 --- a/modules/kherronism/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) Katie Herron - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file diff --git a/modules/kherronism/braker3/main.nf b/modules/kherronism/braker3/main.nf index 82fa096..ae0ec81 100644 --- a/modules/kherronism/braker3/main.nf +++ b/modules/kherronism/braker3/main.nf @@ -2,12 +2,7 @@ process BRAKER3 { tag "${meta.id}" label 'process_high' - conda "bioconda::braker3=3.0.3" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'registry.hub.docker.com/teambraker/braker3:v.1.0.3': - 'registry.hub.docker.com/teambraker/braker3:v.1.0.3' }" - - containerOptions "-B $TMPDIR:$TMPDIR" + container "gallvp/teambraker_braker3:v1.0.6" input: tuple val(meta), path(fasta) @@ -31,51 +26,58 @@ process BRAKER3 { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" - def hints = hintsfile ? "--hints=${hintsfile}" : '' - def bam = bam ? "--bam=${bam}" : '' - def proteins = proteins ? "--prot_seq=${proteins}" : '' - def rna_dirs = rnaseq_sets_dirs ? "--rnaseq_sets_dirs=${rnaseq_sets_dirs}" : '' - def rna_ids = rnaseq_sets_ids ? "--rnaseq_sets_ids=${rnaseq_sets_ids}" : '' + def test_mode = args.contains('--testMode') // Custom flag for test data + def args_fmt = test_mode ? args.replace('--testMode', '') : args + + def rna_ids = rnaseq_sets_ids ? "--rnaseq_sets_ids=${rnaseq_sets_ids}" : '' + def rna_dirs = rnaseq_sets_dirs ? "--rnaseq_sets_dirs=${rnaseq_sets_dirs}" : '' + def bam = bam && !test_mode ? "--bam=${bam}" : '' + def proteins = proteins && !test_mode ? "--prot_seq=${proteins}" : '' + def hints = hintsfile ? "--hints=${hintsfile}" : '' + + def new_species = args.contains('--species') ? '' : "--species new_species" """ cp -r /usr/share/augustus/config augustus_config braker.pl \\ --genome ${fasta} \\ - --species ${prefix} \\ + ${new_species} \\ --workingdir ${prefix} \\ --AUGUSTUS_CONFIG_PATH "\$(pwd)/augustus_config" \\ --threads ${task.cpus} \\ - ${hints} \\ + ${rna_ids} \\ + ${rna_dirs} \\ ${bam} \\ ${proteins} \\ - ${rna_dirs} \\ - ${rna_ids} \\ - ${args} + ${hints} \\ + ${args_fmt} cat <<-END_VERSIONS > versions.yml "${task.process}": - braker3: \$(braker.pl --version 2>&1 | grep "version" | sed 's/braker.pl version//; s/\\s*//') + braker3: \$(braker.pl --version 2>/dev/null | sed 's/braker.pl version//') END_VERSIONS """ stub: - prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" + def rna_ids = rnaseq_sets_ids ? "--rnaseq_sets_ids=${rnaseq_sets_ids}" : '' + def touch_hints = (rna_ids || bam || proteins || hints) ? "touch ${prefix}/hintsfile.gff" : '' """ mkdir "$prefix" touch "${prefix}/braker.gtf" touch "${prefix}/braker.codingseq" touch "${prefix}/braker.aa" - touch "${prefix}/hintsfile.gff" + $touch_hints touch "${prefix}/braker.log" touch "${prefix}/what-to-cite.txt" cat <<-END_VERSIONS > versions.yml "${task.process}": - braker3: \$(braker.pl --version 2>&1 | grep "version" | sed 's/braker.pl version//; s/\\s*//') + braker3: \$(braker.pl --version 2>/dev/null | sed 's/braker.pl version//') END_VERSIONS """ -} \ No newline at end of file +} diff --git a/modules/kherronism/braker3/meta.yml b/modules/kherronism/braker3/meta.yml index b3506fd..ed4da40 100644 --- a/modules/kherronism/braker3/meta.yml +++ b/modules/kherronism/braker3/meta.yml @@ -1,6 +1,6 @@ name: braker3 description: | - Gene prediction in novel genomes using RNA-seq and protein homology information + Gene prediction in novel genomes using RNA-seq and protein homology information keywords: - genome - annotation @@ -8,8 +8,8 @@ keywords: tools: - braker3: description: "BRAKER3 is a pipeline for fully automated prediction of - protein coding gene structures using protein and RNA-seq and protein homology - information" + protein coding gene structures using protein and RNA-seq and protein homology + information" homepage: "https://github.com/Gaius-Augustus/BRAKER" documentation: "https://github.com/Gaius-Augustus/BRAKER" tool_dev_url: "https://github.com/Gaius-Augustus/BRAKER" @@ -22,11 +22,28 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - assembly: + - fasta: type: file - description: Genome assembly + description: Genome assembly fasta pattern: "*.{fasta,fa,fas,faa,fna}" - + - rnaseq_sets_ids: + type: file + description: IDs of RNA-seq data sets to be passed to --rnaseq_sets_ids + - rnaseq_sets_dirs: + type: file + description: Directories of RNA-seq data sets to be passed to --rnaseq_sets_dirs + - bam: + type: file + description: BAM file of RNA-seq data to be passed to --bam + pattern: "*.bam" + - proteins: + type: file + description: Protein evidence to be passed to --proteins + pattern: "*.{fasta,fa,fas,faa}" + - hintsfile: + type: file + description: Hintsfile to be passed to --hintsfile + pattern: "*.{gff, gtf, gff3}" output: - gtf: type: file @@ -55,4 +72,3 @@ output: authors: - "@kherronism" - - "@gallvp" diff --git a/modules/kherronism/repeatmasker/meta.yml b/modules/kherronism/repeatmasker/meta.yml index eb15048..0cab608 100644 --- a/modules/kherronism/repeatmasker/meta.yml +++ b/modules/kherronism/repeatmasker/meta.yml @@ -1,6 +1,6 @@ name: repeatmasker description: | - Screening DNA sequences for interspersed repeats and low complexity DNA sequences. + Screening DNA sequences for interspersed repeats and low complexity DNA sequences keywords: - genome @@ -9,8 +9,9 @@ keywords: tools: - repeatmasker: - description: "RepeatMasker is a program that screens DNA sequences for interspersed - repeats and low complexity DNA sequences." + description: | + RepeatMasker is a program that screens DNA sequences for interspersed + repeats and low complexity DNA sequences homepage: "https://www.repeatmasker.org/" documentation: "https://www.repeatmasker.org/webrepeatmaskerhelp.html" tool_dev_url: "https://github.com/rmhubley/RepeatMasker" @@ -44,4 +45,3 @@ output: authors: - "@kherronism" - - "@gallvp" diff --git a/modules/local/edta/edta/main.nf b/modules/local/edta/edta/main.nf deleted file mode 100644 index 99b6811..0000000 --- a/modules/local/edta/edta/main.nf +++ /dev/null @@ -1,74 +0,0 @@ -nextflow.enable.dsl=2 - -// https://github.com/Plant-Food-Research-Open/assembly_qc -// GPL-3.0: https://github.com/Plant-Food-Research-Open/assembly_qc/blob/main/LICENSE -process EDTA { - tag "$meta.id" - label "process_high" - label "process_week_long" - - container 'quay.io/biocontainers/edta:2.1.0--hdfd78af_1' - containerOptions "-B $TMPDIR:$TMPDIR" - - input: - tuple val(meta), path(fasta_file) - - output: - tuple val(meta), path('*.EDTA.TElib.fa'), emit: te_lib_fasta - tuple val(meta), path('*.EDTA.intact.gff3'), emit: intact_gff3 - tuple val(meta), path('*.EDTA.pass.list'), emit: pass_list - tuple val(meta), path('*.EDTA.out'), emit: out_file - tuple val(meta), path('*.EDTA.TEanno.gff3'), emit: te_anno_gff3 - path "versions.yml", emit: versions - - script: - def args = task.ext.args ?: '' - def modFileName = "${fasta_file}.mod" - """ - EDTA.pl \\ - --genome $fasta_file \\ - --threads $task.cpus \\ - $args - - if [ -f "${modFileName}.EDTA.raw/LTR/${modFileName}.pass.list" ]; then - cat "${modFileName}.EDTA.raw/LTR/${modFileName}.pass.list" \\ - > "${modFileName}.EDTA.pass.list" - else - echo "EDTA PASS LIST IS EMPTY" \\ - > "${modFileName}.EDTA.pass.list" - fi - - if [ -f "${modFileName}.EDTA.anno/${modFileName}.out" ]; then - cat "${modFileName}.EDTA.anno/${modFileName}.out" \\ - > "${modFileName}.EDTA.out" - else - echo "EDTA DID NOT PRODUCE AN OUT FILE" \\ - > "${modFileName}.EDTA.out" - fi - - if [ ! -f "${modFileName}.EDTA.TEanno.gff3" ]; then - echo "##EDTA DID NOT PRODUCE A TEANNO GFF3" \\ - > "${modFileName}.EDTA.TEanno.gff3" - fi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - EDTA: \$(EDTA.pl -h | awk ' /##### Extensive/ {print \$7}') - END_VERSIONS - """ - - stub: - def modFileName = "${fasta_file}.mod" - """ - touch "${modFileName}.EDTA.TElib.fa" - touch "${modFileName}.EDTA.intact.gff3" - touch "${modFileName}.EDTA.pass.list" - touch "${modFileName}.EDTA.out" - touch "${modFileName}.EDTA.TEanno.gff3" - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - EDTA: \$(EDTA.pl -h | awk ' /##### Extensive/ {print \$7}') - END_VERSIONS - """ -} \ No newline at end of file diff --git a/modules/local/edta/restore_edta_ids/main.nf b/modules/local/edta/restore_edta_ids/main.nf deleted file mode 100644 index 606848c..0000000 --- a/modules/local/edta/restore_edta_ids/main.nf +++ /dev/null @@ -1,66 +0,0 @@ -nextflow.enable.dsl=2 - -// https://github.com/Plant-Food-Research-Open/assembly_qc -// GPL-3.0: https://github.com/Plant-Food-Research-Open/assembly_qc/blob/main/LICENSE -process RESTORE_EDTA_IDS { - tag "$meta.id" - label "process_single" - - container "docker://gallvp/python3npkgs:v0.4" - - input: - tuple val(meta), path(te_lib_fa) - path(intact_gff3) - path(pass_list) - path(out_file) - path(te_anno_gff3) - path(renamed_ids_tsv) - - output: - tuple val(meta), path("${meta.id}.EDTA.TElib.fa"), emit: te_lib_fasta - tuple val(meta), path("${meta.id}.EDTA.intact.gff3"), emit: intact_gff3 - tuple val(meta), path("${meta.id}.renamed.ids.EDTA.pass.list"), emit: pass_list - tuple val(meta), path("${meta.id}.renamed.ids.EDTA.out"), emit: out_file - tuple val(meta), path("${meta.id}.EDTA.TEanno.gff3"), emit: te_anno_gff3 - tuple val(meta), path("${meta.id}.renamed.ids.tsv"), emit: renamed_ids_tsv - path "versions.yml", emit: versions - - script: - def VERSION = "f1b7bce" // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - """ - cat $pass_list > "${meta.id}.renamed.ids.EDTA.pass.list" - cat $out_file > "${meta.id}.renamed.ids.EDTA.out" - cat $te_lib_fa > "${meta.id}.EDTA.TElib.fa" - cat $renamed_ids_tsv > "${meta.id}.renamed.ids.tsv" - - renamed_ids_head=\$(head -n 1 "$renamed_ids_tsv") - - if [[ \$renamed_ids_head == "IDs have acceptable length and character. No change required." ]]; then - cat $te_anno_gff3 > "${meta.id}.EDTA.TEanno.gff3" - cat $intact_gff3 > "${meta.id}.EDTA.intact.gff3" - else - reverse_edta_naming_f1b7bce.py "$renamed_ids_tsv" "$te_anno_gff3" "$intact_gff3" "$meta" - fi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - reverse_edta_naming: $VERSION - END_VERSIONS - """ - - stub: - def VERSION = "f1b7bce" // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - """ - touch "${meta.id}.EDTA.TElib.fa" - touch "${meta.id}.EDTA.intact.gff3" - touch "${meta.id}.renamed.ids.EDTA.pass.list" - touch "${meta.id}.renamed.ids.EDTA.out" - touch "${meta.id}.EDTA.TEanno.gff3" - touch "${meta.id}.renamed.ids.tsv" - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - reverse_edta_naming: $VERSION - END_VERSIONS - """ -} \ No newline at end of file diff --git a/modules/local/edta/restore_edta_ids/resources/usr/bin/reverse_edta_naming_f1b7bce.py b/modules/local/edta/restore_edta_ids/resources/usr/bin/reverse_edta_naming_f1b7bce.py deleted file mode 100755 index c047100..0000000 --- a/modules/local/edta/restore_edta_ids/resources/usr/bin/reverse_edta_naming_f1b7bce.py +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env python - -import sys - -renamed_ids_tsv = sys.argv[1] -te_anno_gff3 = sys.argv[2] -intact_gff3 = sys.argv[3] -output_prefix = sys.argv[4] - - -def create_name_mapping_from_file(file_path): - dictionary = {} - - with open(file_path, "r") as tsv_file: - for line in tsv_file: - columns = line.strip().split("\t") - if len(columns) != 2: - raise ValueError(f"{file_path} should be a two column TSV file") - - orig_id, new_id = columns[0], columns[1] - dictionary[new_id] = orig_id - - return dictionary - - -def reverse_rename_gff3_file(new_to_orig_ids, file_path, output_file_name): - with open(file_path, "r") as input_gff3_file: - input_lines = input_gff3_file.readlines() - - with open(output_file_name, "w") as output_gff_file: - for line in input_lines: - if line.startswith("##"): - output_gff_file.write(line) - continue - - new_id = line.split("\t")[0] - orig_id = new_to_orig_ids[new_id] - output_gff_file.write(line.replace(new_id, orig_id)) - - -if __name__ == "__main__": - new_to_orig_ids = create_name_mapping_from_file(renamed_ids_tsv) - reverse_rename_gff3_file( - new_to_orig_ids, te_anno_gff3, f"{output_prefix}.EDTA.TEanno.gff3" - ) - reverse_rename_gff3_file( - new_to_orig_ids, intact_gff3, f"{output_prefix}.EDTA.intact.gff3" - ) diff --git a/modules/local/edta/shorten_edta_ids/main.nf b/modules/local/edta/shorten_edta_ids/main.nf deleted file mode 100644 index 829667b..0000000 --- a/modules/local/edta/shorten_edta_ids/main.nf +++ /dev/null @@ -1,47 +0,0 @@ -nextflow.enable.dsl=2 - -// https://github.com/Plant-Food-Research-Open/assembly_qc -// GPL-3.0: https://github.com/Plant-Food-Research-Open/assembly_qc/blob/main/LICENSE -process SHORTEN_EDTA_IDS { - tag "$meta.id" - label "process_single" - - container "docker://gallvp/python3npkgs:v0.4" - - input: - tuple val(meta), path(fasta_file) - - output: - tuple val(meta), path("*.renamed.ids.fa"), emit: renamed_ids_fasta - tuple val(meta), path("*.renamed.ids.tsv"), emit: renamed_ids_tsv - path "versions.yml", emit: versions - - script: - def VERSION = "c97537f" // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - """ - FILE="$fasta_file" - output_prefix="\${FILE%.*}" - - shorten_fasta_ids_c97537f.py "$fasta_file" "\$output_prefix" - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - shorten_fasta_ids: $VERSION - END_VERSIONS - """ - - stub: - def VERSION = "c97537f" // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - """ - FILE="$fasta_file" - output_prefix="\${FILE%.*}" - - touch "\${output_prefix}.renamed.ids.fa" - touch "\${output_prefix}.renamed.ids.tsv" - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - shorten_fasta_ids: $VERSION - END_VERSIONS - """ -} \ No newline at end of file diff --git a/modules/local/fasta_validate/main.nf b/modules/local/fasta_validate/main.nf deleted file mode 100644 index 7f8370c..0000000 --- a/modules/local/fasta_validate/main.nf +++ /dev/null @@ -1,46 +0,0 @@ -nextflow.enable.dsl=2 - -// https://github.com/Plant-Food-Research-Open/assembly_qc -// GPL-3.0: https://github.com/Plant-Food-Research-Open/assembly_qc/blob/main/LICENSE -process FASTA_VALIDATE { - tag "$meta.id" - label "process_single" - - container "docker://gallvp/fasta_validator:a6a2ec1_ps" - - input: - tuple val(meta), path(fasta_file) - - output: - tuple val(meta), path("$validFasta"), emit: valid_fasta - path "versions.yml", emit: versions - - script: - validFasta = (fasta_file.toString() - ~/\.\w+$/) + ".validated.fasta" - def VERSION = "a6a2ec1_ps" // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - """ - fasta_validate -v $fasta_file >/dev/null - - # If invalid, the above command will fail and - # the NXF error startegy will kick in. - - cat $fasta_file > $validFasta - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fasta_validate: $VERSION - END_VERSIONS - """ - - stub: - validFasta = (fasta_file.toString() - ~/\.\w+$/) + ".validated.fasta" - def VERSION = "a6a2ec1_ps" // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - """ - touch $validFasta - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fasta_validate: $VERSION - END_VERSIONS - """ -} \ No newline at end of file diff --git a/modules/local/samplesheet_check/main.nf b/modules/local/samplesheet_check/main.nf index adb8a92..4fb60f8 100644 --- a/modules/local/samplesheet_check/main.nf +++ b/modules/local/samplesheet_check/main.nf @@ -1,10 +1,9 @@ -nextflow.enable.dsl=2 - +// Source: // https://github.com/nf-core/rnaseq // MIT: https://github.com/nf-core/rnaseq/blob/master/LICENSE // // Changes: -// Added channel permissible_target_assemblies +// Added channel permissible_target_assemblies process SAMPLESHEET_CHECK { tag "$samplesheet" @@ -38,4 +37,4 @@ process SAMPLESHEET_CHECK { python: \$(python --version | sed 's/Python //g') END_VERSIONS """ -} \ No newline at end of file +} diff --git a/modules/local/validate_params/main.nf b/modules/local/validate_params.nf similarity index 63% rename from modules/local/validate_params/main.nf rename to modules/local/validate_params.nf index f40d2ac..460ce80 100644 --- a/modules/local/validate_params/main.nf +++ b/modules/local/validate_params.nf @@ -1,12 +1,20 @@ -nextflow.enable.dsl=2 - -// https://github.com/Plant-Food-Research-Open/assembly_qc -// GPL-3.0: https://github.com/Plant-Food-Research-Open/assembly_qc/blob/main/LICENSE def validateParams(params) { validateFastaTags(params) - validateTETags(params) + if (!params['repeat_annotator']) { + error "Error: repeat_annotator must be either 'repeatmodeler' or 'edta'" + } + + if ( !(params['repeat_annotator'] in ['repeatmodeler', 'edta']) ) { + error "Error: repeat_annotator must be either 'repeatmodeler' or 'edta'" + } + + validateTETags(params) validateTEFastaCorrespondence(params) + + validateRiboDBManifest(params) + + validateLiftoffXrefs(params) } def validateFastaTags(params) { @@ -30,6 +38,11 @@ def validateFastaTags(params) { } def validateTETags(params) { + + if(!params["te_libraries"]) { + return + } + def listOfTETuples = params["te_libraries"] if (listOfTETuples.isEmpty()) { @@ -50,7 +63,11 @@ def validateTETags(params) { } def validateTEFastaCorrespondence(params) { - + + if(!params["te_libraries"]) { + return + } + def listOfTETuples = params["te_libraries"] def listOfFastaTuples = params["target_assemblies"] @@ -64,6 +81,24 @@ def validateTEFastaCorrespondence(params) { } } +def validateRiboDBManifest(params) { + if (params.remove_ribo_rna) { + file_ribo_db = file(params.ribo_database_manifest, checkIfExists: true) + + if (file_ribo_db.isEmpty()) {exit 1, "File provided with --ribo_database_manifest is empty: ${file_ribo_db.getName()}!"} + } +} + +def validateLiftoffXrefs(params) { + if(!params["liftoff_xref_annotations"]) { + return + } + + if(isNotListOfLists(params["liftoff_xref_annotations"]), 2) { + error "Error: liftoff_xref_annotations must be a list of sublists, with each sublist containing 2 elements" + } +} + def isNotListOfLists(thisOne, subListSize) { return (!(thisOne instanceof List) || thisOne.isEmpty() || thisOne.any { !(it instanceof List) || it.size() != subListSize }) -} \ No newline at end of file +} diff --git a/modules/nf-core/CHANGELOG.md b/modules/nf-core/CHANGELOG.md deleted file mode 100644 index 1b886a1..0000000 --- a/modules/nf-core/CHANGELOG.md +++ /dev/null @@ -1,26 +0,0 @@ -## Source - -- Repo: https://github.com/nf-core/modules/tree/3fbcafe2543dabcc7b2be0f3b24507002b3e4b0d -- License: See LICENSE file - -## Changes - -### trinity - -1. Added stub -2. Added author in meta.yml - -### fastp - -1. Added stub -2. Added author in meta.yml - -### star/genomegenerate - -1. Added star_ignore_sjdbgtf -2. Added author in meta.yml - -### sortmerna - -1. Added stub -2. Added author in meta.yml \ No newline at end of file diff --git a/modules/nf-core/LICENSE b/modules/nf-core/LICENSE deleted file mode 100644 index d2e2384..0000000 --- a/modules/nf-core/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) Philip Ewels - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file diff --git a/modules/nf-core/cat/cat/environment.yml b/modules/nf-core/cat/cat/environment.yml new file mode 100644 index 0000000..17a04ef --- /dev/null +++ b/modules/nf-core/cat/cat/environment.yml @@ -0,0 +1,7 @@ +name: cat_cat +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::pigz=2.3.4 diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf index 9f06221..970ab76 100644 --- a/modules/nf-core/cat/cat/main.nf +++ b/modules/nf-core/cat/cat/main.nf @@ -2,7 +2,7 @@ process CAT_CAT { tag "$meta.id" label 'process_low' - conda "conda-forge::pigz=2.3.4" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/pigz:2.3.4' : 'biocontainers/pigz:2.3.4' }" @@ -35,6 +35,10 @@ process CAT_CAT { in_zip = file_list[0].endsWith('.gz') command1 = (in_zip && !out_zip) ? 'zcat' : 'cat' command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : '' + if(file_list.contains(prefix.trim())) { + error "The name of the input file can't be the same as for the output prefix in the " + + "module CAT_CAT (currently `$prefix`). Please choose a different one." + } """ $command1 \\ $args \\ @@ -49,8 +53,12 @@ process CAT_CAT { """ stub: - def file_list = files_in.collect { it.toString() } - prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" + def file_list = files_in.collect { it.toString() } + prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" + if(file_list.contains(prefix.trim())) { + error "The name of the input file can't be the same as for the output prefix in the " + + "module CAT_CAT (currently `$prefix`). Please choose a different one." + } """ touch $prefix diff --git a/modules/nf-core/cat/cat/meta.yml b/modules/nf-core/cat/cat/meta.yml index 8acc0bf..00a8db0 100644 --- a/modules/nf-core/cat/cat/meta.yml +++ b/modules/nf-core/cat/cat/meta.yml @@ -7,9 +7,7 @@ keywords: tools: - cat: description: Just concatenation - documentation: https://man7.org/linux/man-pages/man1/cat.1.html - licence: ["GPL-3.0-or-later"] input: - meta: @@ -21,7 +19,6 @@ input: type: file description: List of compressed / uncompressed files pattern: "*" - output: - versions: type: file @@ -31,7 +28,9 @@ output: type: file description: Concatenated file. Will be gzipped if file_out ends with ".gz" pattern: "${file_out}" - authors: - "@erikrikarddaniel" - "@FriederikeHanssen" +maintainers: + - "@erikrikarddaniel" + - "@FriederikeHanssen" diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test new file mode 100644 index 0000000..ed5a4f1 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test @@ -0,0 +1,179 @@ +nextflow_process { + + name "Test Process CAT_CAT" + script "../main.nf" + process "CAT_CAT" + tag "modules" + tag "modules_nfcore" + tag "cat" + tag "cat/cat" + + test("test_cat_name_conflict") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'genome', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert !process.success }, + { assert process.stdout.toString().contains("The name of the input file can't be the same as for the output prefix") } + ) + } + } + + test("test_cat_unzipped_unzipped") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + + test("test_cat_zipped_zipped") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot(lines[0..5]).match("test_cat_zipped_zipped_lines") }, + { assert snapshot(lines.size()).match("test_cat_zipped_zipped_size")} + ) + } + } + + test("test_cat_zipped_unzipped") { + config './nextflow_zipped_unzipped.config' + + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("test_cat_unzipped_zipped") { + config './nextflow_unzipped_zipped.config' + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot(lines[0..5]).match("test_cat_unzipped_zipped_lines") }, + { assert snapshot(lines.size()).match("test_cat_unzipped_zipped_size")} + ) + } + } + + test("test_cat_one_file_unzipped_zipped") { + config './nextflow_unzipped_zipped.config' + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot(lines[0..5]).match("test_cat_one_file_unzipped_zipped_lines") }, + { assert snapshot(lines.size()).match("test_cat_one_file_unzipped_zipped_size")} + ) + } + } +} + diff --git a/modules/nf-core/cat/cat/tests/main.nf.test.snap b/modules/nf-core/cat/cat/tests/main.nf.test.snap new file mode 100644 index 0000000..423571b --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test.snap @@ -0,0 +1,121 @@ +{ + "test_cat_unzipped_zipped_size": { + "content": [ + 375 + ], + "timestamp": "2023-10-16T14:33:08.049445686" + }, + "test_cat_unzipped_unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "timestamp": "2023-10-16T14:32:18.500464399" + }, + "test_cat_zipped_unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "timestamp": "2023-10-16T14:32:49.642741302" + }, + "test_cat_zipped_zipped_lines": { + "content": [ + [ + "MT192765.1\tGenbank\ttranscript\t259\t29667\t.\t+\t.\tID=unknown_transcript_1;geneID=orf1ab;gene_name=orf1ab", + "MT192765.1\tGenbank\tgene\t259\t21548\t.\t+\t.\tParent=unknown_transcript_1", + "MT192765.1\tGenbank\tCDS\t259\t13461\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", + "MT192765.1\tGenbank\tCDS\t13461\t21548\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", + "MT192765.1\tGenbank\tCDS\t21556\t25377\t.\t+\t0\tParent=unknown_transcript_1;gbkey=CDS;gene=S;note=\"structural protein\";product=\"surface glycoprotein\";protein_id=QIK50427.1", + "MT192765.1\tGenbank\tgene\t21556\t25377\t.\t+\t.\tParent=unknown_transcript_1" + ] + ], + "timestamp": "2023-10-16T14:32:33.629048645" + }, + "test_cat_unzipped_zipped_lines": { + "content": [ + [ + ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", + "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", + "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", + "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", + "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", + "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ] + ], + "timestamp": "2023-10-16T14:33:08.038830506" + }, + "test_cat_one_file_unzipped_zipped_lines": { + "content": [ + [ + ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", + "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", + "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", + "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", + "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", + "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ] + ], + "timestamp": "2023-10-16T14:33:21.39642399" + }, + "test_cat_zipped_zipped_size": { + "content": [ + 78 + ], + "timestamp": "2023-10-16T14:32:33.641869244" + }, + "test_cat_one_file_unzipped_zipped_size": { + "content": [ + 374 + ], + "timestamp": "2023-10-16T14:33:21.4094373" + } +} \ No newline at end of file diff --git a/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config new file mode 100644 index 0000000..ec26b0f --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config @@ -0,0 +1,6 @@ + +process { + withName: CAT_CAT { + ext.prefix = 'cat.txt.gz' + } +} diff --git a/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config new file mode 100644 index 0000000..fbc7978 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config @@ -0,0 +1,8 @@ + +process { + + withName: CAT_CAT { + ext.prefix = 'cat.txt' + } + +} diff --git a/modules/nf-core/cat/cat/tests/tags.yml b/modules/nf-core/cat/cat/tests/tags.yml new file mode 100644 index 0000000..37b578f --- /dev/null +++ b/modules/nf-core/cat/cat/tests/tags.yml @@ -0,0 +1,2 @@ +cat/cat: + - modules/nf-core/cat/cat/** diff --git a/modules/nf-core/cat/fastq/environment.yml b/modules/nf-core/cat/fastq/environment.yml new file mode 100644 index 0000000..bff93ad --- /dev/null +++ b/modules/nf-core/cat/fastq/environment.yml @@ -0,0 +1,7 @@ +name: cat_fastq +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::sed=4.7 diff --git a/modules/nf-core/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf index 5021e6f..3d96378 100644 --- a/modules/nf-core/cat/fastq/main.nf +++ b/modules/nf-core/cat/fastq/main.nf @@ -2,7 +2,7 @@ process CAT_FASTQ { tag "$meta.id" label 'process_single' - conda "conda-forge::sed=4.7" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : 'nf-core/ubuntu:20.04' }" diff --git a/modules/nf-core/cat/fastq/meta.yml b/modules/nf-core/cat/fastq/meta.yml index 8a39e30..db4ac3c 100644 --- a/modules/nf-core/cat/fastq/meta.yml +++ b/modules/nf-core/cat/fastq/meta.yml @@ -34,7 +34,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@joseespinosa" - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test b/modules/nf-core/cat/fastq/tests/main.nf.test new file mode 100644 index 0000000..f5f9418 --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/main.nf.test @@ -0,0 +1,143 @@ +nextflow_process { + + name "Test Process CAT_FASTQ" + script "../main.nf" + process "CAT_FASTQ" + tag "modules" + tag "modules_nfcore" + tag "cat" + tag "cat/fastq" + + test("test_cat_fastq_single_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.reads).match() }, + { assert path(process.out.versions.get(0)).getText().contains("cat") } + ) + } + } + + test("test_cat_fastq_paired_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.reads).match() }, + { assert path(process.out.versions.get(0)).getText().contains("cat") } + ) + } + } + + test("test_cat_fastq_single_end_same_name") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.reads).match() }, + { assert path(process.out.versions.get(0)).getText().contains("cat") } + ) + } + } + + test("test_cat_fastq_paired_end_same_name") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.reads).match() }, + { assert path(process.out.versions.get(0)).getText().contains("cat") } + ) + } + } + + test("test_cat_fastq_single_end_single_file") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.reads).match() }, + { assert path(process.out.versions.get(0)).getText().contains("cat") } + ) + } + } +} diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test.snap b/modules/nf-core/cat/fastq/tests/main.nf.test.snap new file mode 100644 index 0000000..ec2342e --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/main.nf.test.snap @@ -0,0 +1,78 @@ +{ + "test_cat_fastq_single_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,f9cf5e375f7de81a406144a2c70cc64d" + ] + ] + ], + "timestamp": "2023-10-17T23:19:12.990284837" + }, + "test_cat_fastq_single_end_same_name": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,63f817db7a29a03eb538104495556f66" + ] + ] + ], + "timestamp": "2023-10-17T23:19:31.554568147" + }, + "test_cat_fastq_single_end_single_file": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,e325ef7deb4023447a1f074e285761af" + ] + ] + ], + "timestamp": "2023-10-17T23:19:49.629360033" + }, + "test_cat_fastq_paired_end_same_name": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,63f817db7a29a03eb538104495556f66", + "test_2.merged.fastq.gz:md5,fe9f266f43a6fc3dcab690a18419a56e" + ] + ] + ] + ], + "timestamp": "2023-10-17T23:19:40.711617539" + }, + "test_cat_fastq_paired_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,f9cf5e375f7de81a406144a2c70cc64d", + "test_2.merged.fastq.gz:md5,77c8e966e130d8c6b6ec9be52fcb2bda" + ] + ] + ] + ], + "timestamp": "2023-10-18T07:53:20.923560211" + } +} \ No newline at end of file diff --git a/modules/nf-core/cat/fastq/tests/tags.yml b/modules/nf-core/cat/fastq/tests/tags.yml new file mode 100644 index 0000000..6ac4361 --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/tags.yml @@ -0,0 +1,2 @@ +cat/fastq: + - modules/nf-core/cat/fastq/** diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml new file mode 100644 index 0000000..9b3272b --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml @@ -0,0 +1,7 @@ +name: custom_dumpsoftwareversions +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.19 diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf new file mode 100644 index 0000000..f218761 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -0,0 +1,24 @@ +process CUSTOM_DUMPSOFTWAREVERSIONS { + label 'process_single' + + // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' : + 'biocontainers/multiqc:1.19--pyhdfd78af_0' }" + + input: + path versions + + output: + path "software_versions.yml" , emit: yml + path "software_versions_mqc.yml", emit: mqc_yml + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + template 'dumpsoftwareversions.py' +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml new file mode 100644 index 0000000..5f15a5f --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -0,0 +1,37 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: custom_dumpsoftwareversions +description: Custom module used to dump software versions within the nf-core pipeline template +keywords: + - custom + - dump + - version +tools: + - custom: + description: Custom module used to dump software versions within the nf-core pipeline template + homepage: https://github.com/nf-core/tools + documentation: https://github.com/nf-core/tools + licence: ["MIT"] +input: + - versions: + type: file + description: YML file containing software versions + pattern: "*.yml" +output: + - yml: + type: file + description: Standard YML file containing software versions + pattern: "software_versions.yml" + - mqc_yml: + type: file + description: MultiQC custom content YML file containing software versions + pattern: "software_versions_mqc.yml" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@grst" +maintainers: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py new file mode 100755 index 0000000..da03340 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python + + +"""Provide functions to merge multiple versions.yml files.""" + + +import yaml +import platform +from textwrap import dedent + + +def _make_versions_html(versions): + """Generate a tabular HTML output of all versions for MultiQC.""" + html = [ + dedent( + """\\ + + + + + + + + + + """ + ) + ] + for process, tmp_versions in sorted(versions.items()): + html.append("") + for i, (tool, version) in enumerate(sorted(tmp_versions.items())): + html.append( + dedent( + f"""\\ + + + + + + """ + ) + ) + html.append("") + html.append("
Process Name Software Version
{process if (i == 0) else ''}{tool}{version}
") + return "\\n".join(html) + + +def main(): + """Load all version files and generate merged output.""" + versions_this_module = {} + versions_this_module["${task.process}"] = { + "python": platform.python_version(), + "yaml": yaml.__version__, + } + + with open("$versions") as f: + versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module + + # aggregate versions by the module name (derived from fully-qualified process name) + versions_by_module = {} + for process, process_versions in versions_by_process.items(): + module = process.split(":")[-1] + try: + if versions_by_module[module] != process_versions: + raise AssertionError( + "We assume that software versions are the same between all modules. " + "If you see this error-message it means you discovered an edge-case " + "and should open an issue in nf-core/tools. " + ) + except KeyError: + versions_by_module[module] = process_versions + + versions_by_module["Workflow"] = { + "Nextflow": "$workflow.nextflow.version", + "$workflow.manifest.name": "$workflow.manifest.version", + } + + versions_mqc = { + "id": "software_versions", + "section_name": "${workflow.manifest.name} Software Versions", + "section_href": "https://github.com/${workflow.manifest.name}", + "plot_type": "html", + "description": "are collected at run time from the software output.", + "data": _make_versions_html(versions_by_module), + } + + with open("software_versions.yml", "w") as f: + yaml.dump(versions_by_module, f, default_flow_style=False) + with open("software_versions_mqc.yml", "w") as f: + yaml.dump(versions_mqc, f, default_flow_style=False) + + with open("versions.yml", "w") as f: + yaml.dump(versions_this_module, f, default_flow_style=False) + + +if __name__ == "__main__": + main() diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test new file mode 100644 index 0000000..b1e1630 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test @@ -0,0 +1,43 @@ +nextflow_process { + + name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS" + script "../main.nf" + process "CUSTOM_DUMPSOFTWAREVERSIONS" + tag "modules" + tag "modules_nfcore" + tag "custom" + tag "dumpsoftwareversions" + tag "custom/dumpsoftwareversions" + + test("Should run without failures") { + when { + process { + """ + def tool1_version = ''' + TOOL1: + tool1: 0.11.9 + '''.stripIndent() + + def tool2_version = ''' + TOOL2: + tool2: 1.9 + '''.stripIndent() + + input[0] = Channel.of(tool1_version, tool2_version).collectFile() + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + file(process.out.mqc_yml[0]).readLines()[0..10], + file(process.out.yml[0]).readLines()[0..7] + ).match() + } + ) + } + } +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap new file mode 100644 index 0000000..5f59a93 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap @@ -0,0 +1,33 @@ +{ + "Should run without failures": { + "content": [ + [ + "versions.yml:md5,76d454d92244589d32455833f7c1ba6d" + ], + [ + "data: \"\\n\\n \\n \\n \\n \\n \\n \\n \\n\\", + " \\n\\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n \\n \\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n\\n\\n \\n\\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\" + ], + [ + "CUSTOM_DUMPSOFTWAREVERSIONS:", + " python: 3.11.7", + " yaml: 5.4.1", + "TOOL1:", + " tool1: 0.11.9", + "TOOL2:", + " tool2: '1.9'", + "Workflow:" + ] + ], + "timestamp": "2024-01-09T23:01:18.710682" + } +} \ No newline at end of file diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml new file mode 100644 index 0000000..405aa24 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml @@ -0,0 +1,2 @@ +custom/dumpsoftwareversions: + - modules/nf-core/custom/dumpsoftwareversions/** diff --git a/modules/nf-core/fastavalidator/environment.yml b/modules/nf-core/fastavalidator/environment.yml new file mode 100644 index 0000000..70f346e --- /dev/null +++ b/modules/nf-core/fastavalidator/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "fastavalidator" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::py_fasta_validator=0.6" diff --git a/modules/nf-core/fastavalidator/main.nf b/modules/nf-core/fastavalidator/main.nf new file mode 100644 index 0000000..ac5470f --- /dev/null +++ b/modules/nf-core/fastavalidator/main.nf @@ -0,0 +1,62 @@ +process FASTAVALIDATOR { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/py_fasta_validator:0.6--py37h595c7a6_0': + 'biocontainers/py_fasta_validator:0.6--py37h595c7a6_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path('*.success.log') , emit: success_log , optional: true + tuple val(meta), path('*.error.log') , emit: error_log , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + py_fasta_validator \\ + -f $fasta \\ + 2> "${prefix}.error.log" \\ + || echo "Errors from fasta_validate printed to ${prefix}.error.log" + + if [ \$(cat "${prefix}.error.log" | wc -l) -gt 0 ]; then + echo "Validation failed..." + + cat \\ + "${prefix}.error.log" + else + echo "Validation successful..." + + mv \\ + "${prefix}.error.log" \\ + fasta_validate.stderr + + echo "Validation successful..." \\ + > "${prefix}.success.log" + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + py_fasta_validator: \$(py_fasta_validator -v | sed 's/.* version //') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "Validation successful..." \\ + > "${prefix}.success.log" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + py_fasta_validator: \$(py_fasta_validator -v | sed 's/.* version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/fastavalidator/meta.yml b/modules/nf-core/fastavalidator/meta.yml new file mode 100644 index 0000000..c5c4371 --- /dev/null +++ b/modules/nf-core/fastavalidator/meta.yml @@ -0,0 +1,53 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "fastavalidator" +description: | + "Python C-extension for a simple validator for fasta files. The module emits the validated file or an + error log upon validation failure." +keywords: + - fasta + - validation + - genome +tools: + - fasta_validate: + description: | + "Python C-extension for a simple C code to validate a fasta file. It only checks a few things, + and by default only sets its response via the return code, + so you will need to check that!" + homepage: "https://github.com/linsalrob/py_fasta_validator" + documentation: "https://github.com/linsalrob/py_fasta_validator" + tool_dev_url: "https://github.com/linsalrob/py_fasta_validator" + doi: "10.5281/zenodo.5002710" + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing file information + e.g. [ id:'test' ] + - fasta: + type: file + description: Input fasta file + pattern: "*.fasta" +output: + - meta: + type: map + description: | + Groovy Map containing file information + e.g. [ id:'test' ] + - success_log: + type: file + description: Log file for successful validation + pattern: "*.success.log" + - error_log: + type: file + description: Log file for failed validation + pattern: "*.error.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@gallvp" +maintainers: + - "@gallvp" diff --git a/modules/nf-core/fastavalidator/tests/main.nf.test b/modules/nf-core/fastavalidator/tests/main.nf.test new file mode 100644 index 0000000..bb8c22c --- /dev/null +++ b/modules/nf-core/fastavalidator/tests/main.nf.test @@ -0,0 +1,60 @@ +nextflow_process { + + name "Test Process FASTAVALIDATOR" + script "../main.nf" + process "FASTAVALIDATOR" + + tag "modules" + tag "modules_nfcore" + tag "fastavalidator" + + test("sarscov2-fasta-valid") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.success_log != null }, + { assert process.out.error_log == [] }, + { assert path(process.out.success_log.get(0).get(1)).getText().contains("Validation successful...") } + ) + } + + } + + test("sarscov2-gff3-invalid") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.success_log == [] }, + { assert process.out.error_log != null }, + { assert path(process.out.error_log.get(0).get(1)).getText().contains("genome.gff3 does not start with a >") } + ) + } + + } +} diff --git a/modules/nf-core/fastavalidator/tests/main.nf.test.snap b/modules/nf-core/fastavalidator/tests/main.nf.test.snap new file mode 100644 index 0000000..382dee7 --- /dev/null +++ b/modules/nf-core/fastavalidator/tests/main.nf.test.snap @@ -0,0 +1,76 @@ +{ + "sarscov2-fasta-valid": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.success.log:md5,b0b859eda1db5cd43915846e00ebc22c" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,05aa059840b3b4dd6d88bc1e4bf976d7" + ], + "error_log": [ + + ], + "success_log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.success.log:md5,b0b859eda1db5cd43915846e00ebc22c" + ] + ], + "versions": [ + "versions.yml:md5,05aa059840b3b4dd6d88bc1e4bf976d7" + ] + } + ], + "timestamp": "2023-11-28T11:23:25.106872" + }, + "sarscov2-gff3-invalid": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.error.log:md5,531d520c0e7767176f743f197f1f87b3" + ] + ], + "2": [ + "versions.yml:md5,05aa059840b3b4dd6d88bc1e4bf976d7" + ], + "error_log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.error.log:md5,531d520c0e7767176f743f197f1f87b3" + ] + ], + "success_log": [ + + ], + "versions": [ + "versions.yml:md5,05aa059840b3b4dd6d88bc1e4bf976d7" + ] + } + ], + "timestamp": "2023-11-28T11:23:29.40324" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastavalidator/tests/tags.yml b/modules/nf-core/fastavalidator/tests/tags.yml new file mode 100644 index 0000000..c3c7757 --- /dev/null +++ b/modules/nf-core/fastavalidator/tests/tags.yml @@ -0,0 +1,2 @@ +fastavalidator: + - "modules/nf-core/fastavalidator/**" diff --git a/modules/nf-core/fastp/environment.yml b/modules/nf-core/fastp/environment.yml new file mode 100644 index 0000000..70389e6 --- /dev/null +++ b/modules/nf-core/fastp/environment.yml @@ -0,0 +1,7 @@ +name: fastp +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::fastp=0.23.4 diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf index ee38e1d..2a3b679 100644 --- a/modules/nf-core/fastp/main.nf +++ b/modules/nf-core/fastp/main.nf @@ -2,7 +2,7 @@ process FASTP { tag "$meta.id" label 'process_medium' - conda "bioconda::fastp=0.23.4" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/fastp:0.23.4--h5f740d0_0' : 'biocontainers/fastp:0.23.4--h5f740d0_0' }" @@ -45,7 +45,7 @@ process FASTP { $adapter_list \\ $fail_fastq \\ $args \\ - 2> ${prefix}.fastp.log \\ + 2> >(tee ${prefix}.fastp.log >&2) \\ | gzip -c > ${prefix}.fastp.fastq.gz cat <<-END_VERSIONS > versions.yml @@ -66,7 +66,7 @@ process FASTP { $adapter_list \\ $fail_fastq \\ $args \\ - 2> ${prefix}.fastp.log + 2> >(tee ${prefix}.fastp.log >&2) cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -91,7 +91,7 @@ process FASTP { --thread $task.cpus \\ --detect_adapter_for_pe \\ $args \\ - 2> ${prefix}.fastp.log + 2> >(tee ${prefix}.fastp.log >&2) cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -101,13 +101,17 @@ process FASTP { } stub: - def prefix = task.ext.prefix ?: "${meta.id}" + def prefix = task.ext.prefix ?: "${meta.id}" + def is_single_output = task.ext.args?.contains('--interleaved_in') || meta.single_end + def touch_reads = is_single_output ? "${prefix}.fastp.fastq.gz" : "${prefix}_1.fastp.fastq.gz ${prefix}_2.fastp.fastq.gz" + def touch_merged = (!is_single_output && save_merged) ? "touch ${prefix}.merged.fastq.gz" : "" """ - touch "${prefix}.fastp.fastq.gz" - touch "${prefix}.json" - touch "${prefix}.html" - touch "${prefix}.log" - + touch $touch_reads + touch "${prefix}.fastp.json" + touch "${prefix}.fastp.html" + touch "${prefix}.fastp.log" + $touch_merged + cat <<-END_VERSIONS > versions.yml "${task.process}": fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml index 1c34ff9..c22a16a 100644 --- a/modules/nf-core/fastp/meta.yml +++ b/modules/nf-core/fastp/meta.yml @@ -33,7 +33,6 @@ input: - save_merged: type: boolean description: Specify true to save all merged reads to the a file ending in `*.merged.fastq.gz` - output: - meta: type: map @@ -71,4 +70,6 @@ output: authors: - "@drpatelh" - "@kevinmenden" - - "@gallvp" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/fastp/tests/main.nf.test b/modules/nf-core/fastp/tests/main.nf.test new file mode 100644 index 0000000..17dce8a --- /dev/null +++ b/modules/nf-core/fastp/tests/main.nf.test @@ -0,0 +1,726 @@ +nextflow_process { + + name "Test Process FASTP" + script "../main.nf" + process "FASTP" + tag "modules" + tag "modules_nfcore" + tag "fastp" + + test("test_fastp_single_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = [ + [ id:'test', single_end:true ], + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:") - assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip" + assertAll ( + { assert process.success }, + + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. + // looks like this:
Mon 2 Oct 2023
test.gz
+ // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 paired-end [fastq]") { + + when { + process { + """ + input[0] = [ + [id: 'test', single_end: false], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("") }, + { assert path(process.out.html[0][1][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 interleaved [fastq]") { + + when { + process { + """ + input[0] = [ + [id: 'test', single_end: false], // meta map + file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 paired-end [bam]") { + + when { + process { + """ + input[0] = [ + [id: 'test', single_end: false], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 multiple [fastq]") { + + when { + process { + """ + input[0] = [ + [id: 'test', single_end: false], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" }, + { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" }, + { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("") }, + { assert path(process.out.html[0][1][1]).text.contains("") }, + { assert path(process.out.html[0][1][2]).text.contains("") }, + { assert path(process.out.html[0][1][3]).text.contains("") }, + + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 custom_prefix") { + + when { + process { + """ + input[0] = [ + [ id:'mysample', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 single-end [fastq] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id: 'test', single_end:true ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + ] + """ + } } + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.html.collect { file(it[1]).getName() } + + process.out.zip.collect { file(it[1]).getName() } + + process.out.versions ).match() } + ) + } } } diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap new file mode 100644 index 0000000..5ef5afb --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -0,0 +1,20 @@ +{ + "sarscov2 single-end [fastq] - stub": { + "content": [ + [ + "test.html", + "test.zip", + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "timestamp": "2023-12-29T02:48:05.126117287" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "timestamp": "2023-12-29T02:46:49.507942667" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastqc/tests/tags.yml b/modules/nf-core/fastqc/tests/tags.yml new file mode 100644 index 0000000..7834294 --- /dev/null +++ b/modules/nf-core/fastqc/tests/tags.yml @@ -0,0 +1,2 @@ +fastqc: + - modules/nf-core/fastqc/** diff --git a/modules/nf-core/gffread/environment.yml b/modules/nf-core/gffread/environment.yml new file mode 100644 index 0000000..5398f71 --- /dev/null +++ b/modules/nf-core/gffread/environment.yml @@ -0,0 +1,7 @@ +name: gffread +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gffread=0.12.1 diff --git a/modules/nf-core/gffread/main.nf b/modules/nf-core/gffread/main.nf new file mode 100644 index 0000000..d8a473e --- /dev/null +++ b/modules/nf-core/gffread/main.nf @@ -0,0 +1,35 @@ +process GFFREAD { + tag "$gff" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gffread:0.12.1--h8b12597_0' : + 'biocontainers/gffread:0.12.1--h8b12597_0' }" + + input: + path gff + + output: + path "*.gtf" , emit: gtf , optional: true + path "*.gff3" , emit: gffread_gff , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${gff.baseName}" + def extension = args.contains("-T") ? 'gtf' : 'gffread.gff3' + """ + gffread \\ + $gff \\ + $args \\ + -o ${prefix}.${extension} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gffread: \$(gffread --version 2>&1) + END_VERSIONS + """ +} diff --git a/modules/nf-core/gffread/meta.yml b/modules/nf-core/gffread/meta.yml new file mode 100644 index 0000000..27ac310 --- /dev/null +++ b/modules/nf-core/gffread/meta.yml @@ -0,0 +1,36 @@ +name: gffread +description: Validate, filter, convert and perform various other operations on GFF files +keywords: + - gff + - conversion + - validation +tools: + - gffread: + description: GFF/GTF utility providing format conversions, region filtering, FASTA sequence extraction and more. + homepage: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread + documentation: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread + tool_dev_url: https://github.com/gpertea/gffread + doi: 10.12688/f1000research.23297.1 + licence: ["MIT"] +input: + - gff: + type: file + description: A reference file in either the GFF3, GFF2 or GTF format. + pattern: "*.{gff, gtf}" +output: + - gtf: + type: file + description: GTF file resulting from the conversion of the GFF input file if '-T' argument is present + pattern: "*.{gtf}" + - gffread_gff: + type: file + description: GFF3 file resulting from the conversion of the GFF input file if '-T' argument is absent + pattern: "*.{gff3}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@emiller88" +maintainers: + - "@emiller88" diff --git a/modules/nf-core/gffread/tests/main.nf.test b/modules/nf-core/gffread/tests/main.nf.test new file mode 100644 index 0000000..3c064b3 --- /dev/null +++ b/modules/nf-core/gffread/tests/main.nf.test @@ -0,0 +1,61 @@ +nextflow_process { + + name "Test Process GFFREAD" + script "../main.nf" + process "GFFREAD" + + tag "gffread" + tag "modules_nfcore" + tag "modules" + + test("sarscov2-gff3-gtf") { + + config "./nextflow.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gtf != null }, + { assert process.out.gffread_gff == [] } + ) + } + + } + + test("sarscov2-gff3-gff3") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gtf == [] }, + { assert process.out.gffread_gff != null }, + ) + } + + } + +} diff --git a/modules/nf-core/gffread/tests/main.nf.test.snap b/modules/nf-core/gffread/tests/main.nf.test.snap new file mode 100644 index 0000000..1f1342e --- /dev/null +++ b/modules/nf-core/gffread/tests/main.nf.test.snap @@ -0,0 +1,52 @@ +{ + "sarscov2-gff3-gtf": { + "content": [ + { + "0": [ + "genome.gtf:md5,2394072d7d31530dfd590c4a117bf6e3" + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6" + ], + "gffread_gff": [ + + ], + "gtf": [ + "genome.gtf:md5,2394072d7d31530dfd590c4a117bf6e3" + ], + "versions": [ + "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6" + ] + } + ], + "timestamp": "2023-11-29T15:39:30.006985" + }, + "sarscov2-gff3-gff3": { + "content": [ + { + "0": [ + + ], + "1": [ + "genome.gffread.gff3:md5,a7d40d99dcddac23ac673c473279ea2d" + ], + "2": [ + "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6" + ], + "gffread_gff": [ + "genome.gffread.gff3:md5,a7d40d99dcddac23ac673c473279ea2d" + ], + "gtf": [ + + ], + "versions": [ + "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6" + ] + } + ], + "timestamp": "2023-11-29T15:39:34.636061" + } +} \ No newline at end of file diff --git a/modules/nf-core/gffread/tests/nextflow.config b/modules/nf-core/gffread/tests/nextflow.config new file mode 100644 index 0000000..74b2509 --- /dev/null +++ b/modules/nf-core/gffread/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GFFREAD { + ext.args = '-T' + } +} diff --git a/modules/nf-core/gffread/tests/tags.yml b/modules/nf-core/gffread/tests/tags.yml new file mode 100644 index 0000000..0557606 --- /dev/null +++ b/modules/nf-core/gffread/tests/tags.yml @@ -0,0 +1,2 @@ +gffread: + - modules/nf-core/gffread/** diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/gunzip/environment.yml new file mode 100644 index 0000000..25910b3 --- /dev/null +++ b/modules/nf-core/gunzip/environment.yml @@ -0,0 +1,7 @@ +name: gunzip +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::sed=4.7 diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf index 73bf08c..468a6f2 100644 --- a/modules/nf-core/gunzip/main.nf +++ b/modules/nf-core/gunzip/main.nf @@ -2,7 +2,7 @@ process GUNZIP { tag "$archive" label 'process_single' - conda "conda-forge::sed=4.7" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : 'nf-core/ubuntu:20.04' }" diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml index 4cdcdf4..231034f 100644 --- a/modules/nf-core/gunzip/meta.yml +++ b/modules/nf-core/gunzip/meta.yml @@ -33,3 +33,7 @@ authors: - "@joseespinosa" - "@drpatelh" - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test new file mode 100644 index 0000000..d031792 --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test @@ -0,0 +1,35 @@ +nextflow_process { + + name "Test Process GUNZIP" + script "../main.nf" + process "GUNZIP" + tag "gunzip" + tag "modules_nfcore" + tag "modules" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [], + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gunzip/tests/main.nf.test.snap b/modules/nf-core/gunzip/tests/main.nf.test.snap new file mode 100644 index 0000000..720fd9f --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "timestamp": "2023-10-17T15:35:37.690477896" + } +} \ No newline at end of file diff --git a/modules/nf-core/gunzip/tests/tags.yml b/modules/nf-core/gunzip/tests/tags.yml new file mode 100644 index 0000000..fd3f691 --- /dev/null +++ b/modules/nf-core/gunzip/tests/tags.yml @@ -0,0 +1,2 @@ +gunzip: + - modules/nf-core/gunzip/** diff --git a/modules/nf-core/samtools/cat/environment.yml b/modules/nf-core/samtools/cat/environment.yml new file mode 100644 index 0000000..0455a7d --- /dev/null +++ b/modules/nf-core/samtools/cat/environment.yml @@ -0,0 +1,7 @@ +name: samtools_cat +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.18 diff --git a/modules/nf-core/samtools/cat/main.nf b/modules/nf-core/samtools/cat/main.nf index 22a63e2..b3b2508 100644 --- a/modules/nf-core/samtools/cat/main.nf +++ b/modules/nf-core/samtools/cat/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_CAT { tag "$meta.id" label 'process_low' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' : + 'biocontainers/samtools:1.18--h50ea8bc_1' }" input: tuple val(meta), path(input_files, stageAs: "?/*") diff --git a/modules/nf-core/samtools/cat/meta.yml b/modules/nf-core/samtools/cat/meta.yml index 42632e7..3541e0c 100644 --- a/modules/nf-core/samtools/cat/meta.yml +++ b/modules/nf-core/samtools/cat/meta.yml @@ -47,3 +47,5 @@ output: pattern: "versions.yml" authors: - "@matthdsm" +maintainers: + - "@matthdsm" diff --git a/modules/nf-core/samtools/cat/tests/main.nf.test b/modules/nf-core/samtools/cat/tests/main.nf.test new file mode 100644 index 0000000..49c633f --- /dev/null +++ b/modules/nf-core/samtools/cat/tests/main.nf.test @@ -0,0 +1,72 @@ +nextflow_process { + + name "Test Process SAMTOOLS_CAT" + script "../main.nf" + process "SAMTOOLS_CAT" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/cat" + + test("sarscov2 - [bam1, bam2]") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_unaligned_bam'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.cram, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [bam1, bam2] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_unaligned_bam'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.cram, + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/samtools/cat/tests/main.nf.test.snap b/modules/nf-core/samtools/cat/tests/main.nf.test.snap new file mode 100644 index 0000000..298e25d --- /dev/null +++ b/modules/nf-core/samtools/cat/tests/main.nf.test.snap @@ -0,0 +1,26 @@ +{ + "sarscov2 - [bam1, bam2]": { + "content": [ + "test.bam", + [ + + ], + [ + "versions.yml:md5,f10a4f6b2e0272bef2ceb4ca826a15a1" + ] + ], + "timestamp": "2023-12-04T14:00:18.264348819" + }, + "sarscov2 - [bam1, bam2] - stub": { + "content": [ + "test.bam", + [ + + ], + [ + "versions.yml:md5,f10a4f6b2e0272bef2ceb4ca826a15a1" + ] + ], + "timestamp": "2023-12-04T14:03:17.714482742" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/cat/tests/tags.yml b/modules/nf-core/samtools/cat/tests/tags.yml new file mode 100644 index 0000000..9760557 --- /dev/null +++ b/modules/nf-core/samtools/cat/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/cat: + - "modules/nf-core/samtools/cat/**" diff --git a/modules/nf-core/sortmerna/environment.yml b/modules/nf-core/sortmerna/environment.yml new file mode 100644 index 0000000..f40f995 --- /dev/null +++ b/modules/nf-core/sortmerna/environment.yml @@ -0,0 +1,7 @@ +name: sortmerna +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::sortmerna=4.3.6 diff --git a/modules/nf-core/sortmerna/main.nf b/modules/nf-core/sortmerna/main.nf index 5b4fbca..29c640c 100644 --- a/modules/nf-core/sortmerna/main.nf +++ b/modules/nf-core/sortmerna/main.nf @@ -1,11 +1,11 @@ process SORTMERNA { tag "$meta.id" - label "process_high" + label 'process_high' - conda "bioconda::sortmerna=4.3.4" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/sortmerna:4.3.4--h9ee0642_0' : - 'biocontainers/sortmerna:4.3.4--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/sortmerna:4.3.6--h9ee0642_0' : + 'biocontainers/sortmerna:4.3.6--h9ee0642_0' }" input: tuple val(meta), path(reads) diff --git a/modules/nf-core/sortmerna/meta.yml b/modules/nf-core/sortmerna/meta.yml index 66f00de..de0b18e 100644 --- a/modules/nf-core/sortmerna/meta.yml +++ b/modules/nf-core/sortmerna/meta.yml @@ -48,4 +48,6 @@ output: authors: - "@drpatelh" - "@mashehu" - - "@gallvp" +maintainers: + - "@drpatelh" + - "@mashehu" diff --git a/modules/nf-core/sortmerna/tests/main.nf.test b/modules/nf-core/sortmerna/tests/main.nf.test new file mode 100644 index 0000000..8a01e2a --- /dev/null +++ b/modules/nf-core/sortmerna/tests/main.nf.test @@ -0,0 +1,144 @@ +nextflow_process { + + name "Test Process SORTMERNA" + script "../main.nf" + process "SORTMERNA" + tag "modules" + tag "modules_nfcore" + tag "sortmerna" + + test("sarscov2 single_end") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + input[1] = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.reads }, + { assert file(process.out.log[0][1]).text.contains("Total reads passing E-value threshold = 100 (100.00)") }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + ).sort() + ).match("sarscov2 single_end-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("sarscov2 single_end stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + input[1] = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + ).sort() + ).match("sarscov2 single_end-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("sarscov2 paired_end") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + input[1] = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.reads }, + { assert file(process.out.log[0][1]).text.contains("Total reads passing E-value threshold = 200 (100.00)") }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.log.collect { file(it[1]).getName() } + ).sort() + ).match("sarscov2 paired_end-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("sarscov2 paired_end stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + input[1] = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.log.collect { file(it[1]).getName() } + ).sort() + ).match("sarscov2 paired_end-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + +} diff --git a/modules/nf-core/sortmerna/tests/main.nf.test.snap b/modules/nf-core/sortmerna/tests/main.nf.test.snap new file mode 100644 index 0000000..e502000 --- /dev/null +++ b/modules/nf-core/sortmerna/tests/main.nf.test.snap @@ -0,0 +1,33 @@ +{ + "sarscov2 single_end-for_stub_match": { + "content": [ + [ + "test.non_rRNA.fastq.gz", + "test.sortmerna.log", + "{id=test, single_end=true}" + ] + ], + "timestamp": "2023-12-21T11:56:00.15356" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,7df9d50209f351e1f75e05a1fad6ba4b" + ] + ], + "timestamp": "2023-12-21T11:56:00.200244" + }, + "sarscov2 paired_end-for_stub_match": { + "content": [ + [ + [ + "test_1.non_rRNA.fastq.gz", + "test_2.non_rRNA.fastq.gz" + ], + "test.sortmerna.log", + "{id=test, single_end=false}" + ] + ], + "timestamp": "2023-12-21T12:00:47.879193" + } +} \ No newline at end of file diff --git a/modules/nf-core/sortmerna/tests/tags.yml b/modules/nf-core/sortmerna/tests/tags.yml new file mode 100644 index 0000000..e088480 --- /dev/null +++ b/modules/nf-core/sortmerna/tests/tags.yml @@ -0,0 +1,2 @@ +sortmerna: + - modules/nf-core/sortmerna/** diff --git a/modules/nf-core/star/align/environment.yml b/modules/nf-core/star/align/environment.yml new file mode 100644 index 0000000..36fcd02 --- /dev/null +++ b/modules/nf-core/star/align/environment.yml @@ -0,0 +1,9 @@ +name: star_align +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::star=2.7.10a + - bioconda::samtools=1.18 + - conda-forge::gawk=5.1.0 diff --git a/modules/nf-core/star/align/main.nf b/modules/nf-core/star/align/main.nf index d0e2038..8e9c48b 100644 --- a/modules/nf-core/star/align/main.nf +++ b/modules/nf-core/star/align/main.nf @@ -2,10 +2,10 @@ process STAR_ALIGN { tag "$meta.id" label 'process_high' - conda "bioconda::star=2.7.10a bioconda::samtools=1.16.1 conda-forge::gawk=5.1.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' : - 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' : + 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' }" input: tuple val(meta), path(reads, stageAs: "input*/*") diff --git a/modules/nf-core/star/align/meta.yml b/modules/nf-core/star/align/meta.yml index 3d8fed0..e80dbb7 100644 --- a/modules/nf-core/star/align/meta.yml +++ b/modules/nf-core/star/align/meta.yml @@ -52,7 +52,6 @@ input: - seq_center: type: string description: Sequencing center - output: - bam: type: file @@ -106,8 +105,11 @@ output: type: file description: STAR output bedGraph format file(s) (optional) pattern: "*.bg" - authors: - "@kevinmenden" - "@drpatelh" - "@praveenraj2018" +maintainers: + - "@kevinmenden" + - "@drpatelh" + - "@praveenraj2018" diff --git a/modules/nf-core/star/align/tests/main.nf.test b/modules/nf-core/star/align/tests/main.nf.test new file mode 100644 index 0000000..4c87847 --- /dev/null +++ b/modules/nf-core/star/align/tests/main.nf.test @@ -0,0 +1,339 @@ +nextflow_process { + + name "Test Process STAR_ALIGN" + script "../main.nf" + process "STAR_ALIGN" + tag "modules" + tag "modules_nfcore" + tag "star" + tag "star/align" + + test("homo_sapiens - single_end") { + config "./nextflow.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true) ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - single_end - log_final") }, + { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - single_end - log_out") }, + { assert snapshot(process.out.bam).match("homo_sapiens - single_end - bam") }, + { assert snapshot(process.out.bam_sorted).match("homo_sapiens - single_end - bam_sorted") }, + { assert snapshot(process.out.bam_transcript).match("homo_sapiens - single_end - bam_transcript") }, + { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - single_end - bam_unsorted") }, + { assert snapshot(process.out.bedgraph).match("homo_sapiens - single_end - bedgraph") }, + { assert snapshot(process.out.fastq).match("homo_sapiens - single_end - fastq") }, + { assert snapshot(process.out.junction).match("homo_sapiens - single_end - junction") }, + { assert snapshot(process.out.log_progress).match("homo_sapiens - single_end - log_progress") }, + { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - single_end - read_per_gene_tab") }, + { assert snapshot(process.out.sam).match("homo_sapiens - single_end - sam") }, + { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - single_end - spl_junc_tab") }, + { assert snapshot(process.out.tab).match("homo_sapiens - single_end - tab") }, + { assert snapshot(process.out.wig).match("homo_sapiens - single_end - wig") }, + { assert snapshot(process.out.versions).match("homo_sapiens - single_end - versions") } + ) + } + } + + test("homo_sapiens - paired_end") { + config "./nextflow.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_2_fastq_gz'], checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - paired_end - log_final") }, + { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - paired_end - log_out") }, + { assert snapshot(process.out.bam).match("homo_sapiens - paired_end - bam") }, + { assert snapshot(process.out.bam_sorted).match("homo_sapiens - paired_end - bam_sorted") }, + { assert snapshot(process.out.bam_transcript).match("homo_sapiens - paired_end - bam_transcript") }, + { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - paired_end - bam_unsorted") }, + { assert snapshot(process.out.bedgraph).match("homo_sapiens - paired_end - bedgraph") }, + { assert snapshot(process.out.fastq).match("homo_sapiens - paired_end - fastq") }, + { assert snapshot(process.out.junction).match("homo_sapiens - paired_end - junction") }, + { assert snapshot(process.out.log_progress).match("homo_sapiens - paired_end - log_progress") }, + { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - paired_end - read_per_gene_tab") }, + { assert snapshot(process.out.sam).match("homo_sapiens - paired_end - sam") }, + { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - paired_end - spl_junc_tab") }, + { assert snapshot(process.out.tab).match("homo_sapiens - paired_end - tab") }, + { assert snapshot(process.out.wig).match("homo_sapiens - paired_end - wig") }, + { assert snapshot(process.out.versions).match("homo_sapiens - paired_end - versions") } + ) + } + } + + test("homo_sapiens - paired_end - arriba") { + config "./nextflow.arriba.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_2_fastq_gz'], checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - paired_end - arriba - log_final") }, + { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - paired_end - arriba - log_out") }, + { assert snapshot(file(process.out.log_progress[0][1]).name).match("homo_sapiens - paired_end - arriba - log_progress") }, + { assert snapshot(process.out.bam).match("homo_sapiens - paired_end - arriba - bam") }, + { assert snapshot(process.out.bam_sorted).match("homo_sapiens - paired_end - arriba - bam_sorted") }, + { assert snapshot(process.out.bam_transcript).match("homo_sapiens - paired_end - arriba - bam_transcript") }, + { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - paired_end - arriba - bam_unsorted") }, + { assert snapshot(process.out.bedgraph).match("homo_sapiens - paired_end - arriba - bedgraph") }, + { assert snapshot(process.out.fastq).match("homo_sapiens - paired_end - arriba - fastq") }, + { assert snapshot(process.out.junction).match("homo_sapiens - paired_end - arriba - junction") }, + { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - paired_end - arriba - read_per_gene_tab") }, + { assert snapshot(process.out.sam).match("homo_sapiens - paired_end - arriba - sam") }, + { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - paired_end - arriba - spl_junc_tab") }, + { assert snapshot(process.out.tab).match("homo_sapiens - paired_end - arriba - tab") }, + { assert snapshot(process.out.wig).match("homo_sapiens - paired_end - arriba - wig") }, + { assert snapshot(process.out.versions).match("homo_sapiens - paired_end - arriba - versions") } + ) + } + } + + test("homo_sapiens - paired_end - starfusion") { + config "./nextflow.starfusion.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_2_fastq_gz'], checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - paired_end - starfusion - log_final") }, + { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - paired_end - starfusion - log_out") }, + { assert snapshot(file(process.out.log_progress[0][1]).name).match("homo_sapiens - paired_end - starfusion - log_progress") }, + { assert snapshot(process.out.bam).match("homo_sapiens - paired_end - starfusion - bam") }, + { assert snapshot(process.out.bam_sorted).match("homo_sapiens - paired_end - starfusion - bam_sorted") }, + { assert snapshot(process.out.bam_transcript).match("homo_sapiens - paired_end - starfusion - bam_transcript") }, + { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - paired_end - starfusion - bam_unsorted") }, + { assert snapshot(process.out.bedgraph).match("homo_sapiens - paired_end - starfusion - bedgraph") }, + { assert snapshot(process.out.fastq).match("homo_sapiens - paired_end - starfusion - fastq") }, + { assert snapshot(process.out.junction).match("homo_sapiens - paired_end - starfusion - junction") }, + { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - paired_end - starfusion - read_per_gene_tab") }, + { assert snapshot(process.out.sam).match("homo_sapiens - paired_end - starfusion - sam") }, + { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - paired_end - starfusion - spl_junc_tab") }, + { assert snapshot(process.out.tab).match("homo_sapiens - paired_end - starfusion - tab") }, + { assert snapshot(process.out.wig).match("homo_sapiens - paired_end - starfusion - wig") }, + { assert snapshot(process.out.versions).match("homo_sapiens - paired_end - starfusion - versions") } + ) + } + } + + test("homo_sapiens - paired_end - multiple") { + config "./nextflow.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_2_fastq_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_2_fastq_gz'], checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - paired_end - multiple - log_final") }, + { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - paired_end - multiple - log_out") }, + { assert snapshot(file(process.out.log_progress[0][1]).name).match("homo_sapiens - paired_end - multiple - log_progress") }, + { assert snapshot(process.out.bam).match("homo_sapiens - paired_end - multiple - bam") }, + { assert snapshot(process.out.bam_sorted).match("homo_sapiens - paired_end - multiple - bam_sorted") }, + { assert snapshot(process.out.bam_transcript).match("homo_sapiens - paired_end - multiple - bam_transcript") }, + { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - paired_end - multiple - bam_unsorted") }, + { assert snapshot(process.out.bedgraph).match("homo_sapiens - paired_end - multiple - bedgraph") }, + { assert snapshot(process.out.fastq).match("homo_sapiens - paired_end - multiple - fastq") }, + { assert snapshot(process.out.junction).match("homo_sapiens - paired_end - multiple - junction") }, + { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - paired_end - multiple - read_per_gene_tab") }, + { assert snapshot(process.out.sam).match("homo_sapiens - paired_end - multiple - sam") }, + { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - paired_end - multiple - spl_junc_tab") }, + { assert snapshot(process.out.tab).match("homo_sapiens - paired_end - multiple - tab") }, + { assert snapshot(process.out.wig).match("homo_sapiens - paired_end - multiple - wig") }, + { assert snapshot(process.out.versions).match("homo_sapiens - paired_end - multiple - versions") } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/star/align/tests/main.nf.test.snap b/modules/nf-core/star/align/tests/main.nf.test.snap new file mode 100644 index 0000000..08edb91 --- /dev/null +++ b/modules/nf-core/star/align/tests/main.nf.test.snap @@ -0,0 +1,769 @@ +{ + "homo_sapiens - paired_end - multiple - bam_sorted": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,ab07c21d63ab0a6c07d171d213c81d5a" + ] + ] + ], + "timestamp": "2023-12-04T18:01:19.968225733" + }, + "homo_sapiens - paired_end - multiple - wig": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:29:01.857804" + }, + "homo_sapiens - paired_end - arriba - tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,5155c9fd1f787ad6d7d80987fb06219c" + ] + ] + ], + "timestamp": "2023-12-04T17:56:12.347549723" + }, + "homo_sapiens - single_end - wig": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:22:55.24701" + }, + "homo_sapiens - paired_end - sam": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:23:33.383818" + }, + "homo_sapiens - paired_end - arriba - versions": { + "content": [ + [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ] + ], + "timestamp": "2023-12-04T17:56:12.431212643" + }, + "homo_sapiens - paired_end - multiple - bedgraph": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Signal.Unique.str1.out.bg:md5,d7bf8b70b436ca048a62513e1d0ece3a", + "test.Signal.UniqueMultiple.str1.out.bg:md5,686d58493b9eb445b56ace4d67f76ef6" + ] + ] + ] + ], + "timestamp": "2023-12-04T18:01:20.07119229" + }, + "homo_sapiens - paired_end - read_per_gene_tab": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:23:33.368841" + }, + "homo_sapiens - paired_end - arriba - bedgraph": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:07.102537" + }, + "homo_sapiens - single_end - junction": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:22:55.185369" + }, + "homo_sapiens - paired_end - arriba - spl_junc_tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,5155c9fd1f787ad6d7d80987fb06219c" + ] + ] + ], + "timestamp": "2023-12-04T17:56:12.268388251" + }, + "homo_sapiens - single_end - sam": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:22:55.216183" + }, + "homo_sapiens - paired_end - fastq": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:23:33.327236" + }, + "homo_sapiens - single_end - versions": { + "content": [ + [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ] + ], + "timestamp": "2023-12-04T17:53:26.664210196" + }, + "homo_sapiens - paired_end - multiple - log_out": { + "content": [ + "test.Log.out" + ], + "timestamp": "2023-11-23T13:29:01.022176" + }, + "homo_sapiens - paired_end - arriba - fastq": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:07.15277" + }, + "homo_sapiens - paired_end - multiple - junction": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:29:01.52923" + }, + "homo_sapiens - paired_end - multiple - spl_junc_tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,069877e053714e23010fe4e1c003b4a2" + ] + ] + ], + "timestamp": "2023-12-04T18:01:20.189486201" + }, + "homo_sapiens - paired_end - starfusion - log_final": { + "content": [ + "test.Log.final.out" + ], + "timestamp": "2023-11-23T13:27:55.905883" + }, + "homo_sapiens - paired_end - starfusion - fastq": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:27:56.192302" + }, + "homo_sapiens - paired_end - multiple - sam": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:29:01.661837" + }, + "homo_sapiens - paired_end - multiple - log_final": { + "content": [ + "test.Log.final.out" + ], + "timestamp": "2023-11-23T13:29:00.966417" + }, + "homo_sapiens - paired_end - starfusion - bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.out.bam:md5,bcad07b838f6762fc01eea52b5cd3f84" + ] + ] + ], + "timestamp": "2023-12-04T17:59:58.53235164" + }, + "homo_sapiens - paired_end - arriba - junction": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:07.202776" + }, + "homo_sapiens - single_end - bedgraph": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.Signal.Unique.str1.out.bg:md5,c56fc1472776fb927eaf62d973da5f9a", + "test.Signal.UniqueMultiple.str1.out.bg:md5,e93373cf6f2a2a9506e2efdb260cdd4f" + ] + ] + ] + ], + "timestamp": "2023-12-04T17:53:26.394863748" + }, + "homo_sapiens - paired_end - arriba - read_per_gene_tab": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:07.251962" + }, + "homo_sapiens - paired_end - starfusion - bam_sorted": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:27:56.040843" + }, + "homo_sapiens - single_end - bam_unsorted": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:22:55.154172" + }, + "homo_sapiens - paired_end - bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,b9ee1c607e07323bc1652ef3babb543f" + ] + ] + ], + "timestamp": "2023-12-04T17:54:11.934832258" + }, + "homo_sapiens - paired_end - arriba - bam_transcript": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:06.998817" + }, + "homo_sapiens - paired_end - log_out": { + "content": [ + "test.Log.out" + ], + "timestamp": "2023-11-23T13:23:33.259699" + }, + "homo_sapiens - paired_end - arriba - log_out": { + "content": [ + "test.Log.out" + ], + "timestamp": "2023-11-23T13:25:06.849451" + }, + "homo_sapiens - paired_end - multiple - versions": { + "content": [ + [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ] + ], + "timestamp": "2023-12-04T18:01:20.393705142" + }, + "homo_sapiens - paired_end - starfusion - bam_transcript": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:27:56.082408" + }, + "homo_sapiens - paired_end - starfusion - tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,19c3faa1bfa9a0cc5e4c45f17065b53a" + ] + ] + ], + "timestamp": "2023-12-04T17:59:58.818041322" + }, + "homo_sapiens - single_end - fastq": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:22:55.175307" + }, + "homo_sapiens - paired_end - tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,844af19ab0fc8cd9a3f75228445aca0d" + ] + ] + ], + "timestamp": "2023-12-04T17:54:12.255481058" + }, + "homo_sapiens - paired_end - starfusion - bedgraph": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:27:56.155413" + }, + "homo_sapiens - single_end - bam_transcript": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:22:55.144852" + }, + "homo_sapiens - paired_end - versions": { + "content": [ + [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ] + ], + "timestamp": "2023-12-04T17:54:12.343840482" + }, + "homo_sapiens - paired_end - multiple - tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,069877e053714e23010fe4e1c003b4a2" + ] + ] + ], + "timestamp": "2023-12-04T18:01:20.291692062" + }, + "homo_sapiens - single_end - bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.Aligned.sortedByCoord.out.bam:md5,c6cfaccaf91bc7fdabed3cfe236d4535" + ] + ] + ], + "timestamp": "2023-12-04T17:53:26.265642675" + }, + "homo_sapiens - paired_end - arriba - wig": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:07.444214" + }, + "homo_sapiens - paired_end - log_progress": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,b2bd061d6cbaaf3d6d3b1fed547f69b8" + ] + ] + ], + "timestamp": "2023-12-04T17:54:12.126063825" + }, + "homo_sapiens - paired_end - arriba - log_final": { + "content": [ + "test.Log.final.out" + ], + "timestamp": "2023-11-23T13:25:06.829799" + }, + "homo_sapiens - paired_end - bam_unsorted": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:23:33.300509" + }, + "homo_sapiens - paired_end - arriba - sam": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:07.300383" + }, + "homo_sapiens - paired_end - multiple - bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,ab07c21d63ab0a6c07d171d213c81d5a" + ] + ] + ], + "timestamp": "2023-12-04T18:01:19.851247126" + }, + "homo_sapiens - paired_end - multiple - fastq": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:29:01.462257" + }, + "homo_sapiens - single_end - bam_sorted": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.Aligned.sortedByCoord.out.bam:md5,c6cfaccaf91bc7fdabed3cfe236d4535" + ] + ] + ], + "timestamp": "2023-12-04T17:53:26.335457371" + }, + "homo_sapiens - paired_end - arriba - bam_sorted": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:06.94699" + }, + "homo_sapiens - paired_end - starfusion - junction": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,c10ef219f4a30e83711b995bc5e40dba" + ] + ] + ], + "timestamp": "2023-12-04T17:59:58.641115828" + }, + "homo_sapiens - single_end - tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.SJ.out.tab:md5,75a516ab950fb958f40b29996474949c" + ] + ] + ], + "timestamp": "2023-12-04T17:53:26.580593434" + }, + "homo_sapiens - paired_end - starfusion - versions": { + "content": [ + [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ] + ], + "timestamp": "2023-12-04T17:59:58.907317103" + }, + "homo_sapiens - paired_end - multiple - bam_unsorted": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:29:01.330463" + }, + "homo_sapiens - paired_end - arriba - log_progress": { + "content": [ + "test.Log.progress.out" + ], + "timestamp": "2023-11-23T13:25:06.86866" + }, + "homo_sapiens - paired_end - bedgraph": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Signal.Unique.str1.out.bg:md5,d7bf8b70b436ca048a62513e1d0ece3a", + "test.Signal.UniqueMultiple.str1.out.bg:md5,686d58493b9eb445b56ace4d67f76ef6" + ] + ] + ] + ], + "timestamp": "2023-12-04T17:54:12.064121304" + }, + "homo_sapiens - paired_end - starfusion - bam_unsorted": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:27:56.118974" + }, + "homo_sapiens - paired_end - starfusion - read_per_gene_tab": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:27:56.264699" + }, + "homo_sapiens - paired_end - multiple - log_progress": { + "content": [ + "test.Log.progress.out" + ], + "timestamp": "2023-11-23T13:29:01.076947" + }, + "homo_sapiens - paired_end - arriba - bam_unsorted": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:07.050409" + }, + "homo_sapiens - paired_end - bam_sorted": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,b9ee1c607e07323bc1652ef3babb543f" + ] + ] + ], + "timestamp": "2023-12-04T17:54:12.002180537" + }, + "homo_sapiens - single_end - spl_junc_tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.SJ.out.tab:md5,75a516ab950fb958f40b29996474949c" + ] + ] + ], + "timestamp": "2023-12-04T17:53:26.50932751" + }, + "homo_sapiens - paired_end - starfusion - spl_junc_tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,19c3faa1bfa9a0cc5e4c45f17065b53a" + ] + ] + ], + "timestamp": "2023-12-04T17:59:58.731699486" + }, + "homo_sapiens - single_end - log_out": { + "content": [ + "test.Log.out" + ], + "timestamp": "2023-11-23T13:22:55.126286" + }, + "homo_sapiens - paired_end - log_final": { + "content": [ + "test.Log.final.out" + ], + "timestamp": "2023-11-23T13:23:33.253884" + }, + "homo_sapiens - single_end - log_final": { + "content": [ + "test.Log.final.out" + ], + "timestamp": "2023-11-23T13:22:55.11799" + }, + "homo_sapiens - paired_end - bam_transcript": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:23:33.287684" + }, + "homo_sapiens - paired_end - starfusion - log_progress": { + "content": [ + "test.Log.progress.out" + ], + "timestamp": "2023-11-23T13:27:55.971484" + }, + "homo_sapiens - paired_end - multiple - bam_transcript": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:29:01.264176" + }, + "homo_sapiens - paired_end - multiple - read_per_gene_tab": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:29:01.596406" + }, + "homo_sapiens - single_end - read_per_gene_tab": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:22:55.205936" + }, + "homo_sapiens - paired_end - junction": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:23:33.340653" + }, + "homo_sapiens - paired_end - spl_junc_tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,844af19ab0fc8cd9a3f75228445aca0d" + ] + ] + ], + "timestamp": "2023-12-04T17:54:12.185730856" + }, + "homo_sapiens - paired_end - starfusion - sam": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:27:56.300637" + }, + "homo_sapiens - paired_end - arriba - bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.out.bam:md5,c1b1747f5873f2d17762725636e891d5" + ] + ] + ], + "timestamp": "2023-12-04T17:56:12.190560178" + }, + "homo_sapiens - single_end - log_progress": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.Log.progress.out:md5,b2bd061d6cbaaf3d6d3b1fed547f69b8" + ] + ] + ], + "timestamp": "2023-12-04T17:53:26.450352138" + }, + "homo_sapiens - paired_end - starfusion - wig": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:27:56.422018" + }, + "homo_sapiens - paired_end - wig": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:23:33.429457" + }, + "homo_sapiens - paired_end - starfusion - log_out": { + "content": [ + "test.Log.out" + ], + "timestamp": "2023-11-23T13:27:55.93945" + } +} \ No newline at end of file diff --git a/modules/nf-core/star/align/tests/nextflow.arriba.config b/modules/nf-core/star/align/tests/nextflow.arriba.config new file mode 100644 index 0000000..2324b9e --- /dev/null +++ b/modules/nf-core/star/align/tests/nextflow.arriba.config @@ -0,0 +1,14 @@ +process { + + withName: STAR_GENOMEGENERATE { + ext.args = '--genomeSAindexNbases 9' + } + + withName: STAR_ALIGN { + ext.args = '--readFilesCommand zcat --outSAMtype BAM Unsorted --outSAMunmapped Within --outBAMcompression 0 --outFilterMultimapNmax 50 --peOverlapNbasesMin 10 --alignSplicedMateMapLminOverLmate 0.5 --alignSJstitchMismatchNmax 5 -1 5 5 --chimSegmentMin 10 --chimOutType WithinBAM HardClip --chimJunctionOverhangMin 10 --chimScoreDropMax 30 --chimScoreJunctionNonGTAG 0 --chimScoreSeparation 1 --chimSegmentReadGapMax 3 --chimMultimapNmax 50' + } + +} + +// Fix chown issue for the output star folder +docker.runOptions = '--platform=linux/amd64 -u $(id -u):$(id -g)' diff --git a/modules/nf-core/star/align/tests/nextflow.config b/modules/nf-core/star/align/tests/nextflow.config new file mode 100644 index 0000000..c4ac580 --- /dev/null +++ b/modules/nf-core/star/align/tests/nextflow.config @@ -0,0 +1,14 @@ +process { + + withName: STAR_GENOMEGENERATE { + ext.args = '--genomeSAindexNbases 9' + } + + withName: STAR_ALIGN { + ext.args = '--readFilesCommand zcat --outSAMtype BAM SortedByCoordinate --outWigType bedGraph --outWigStrand Unstranded' + } + +} + +// Fix chown issue for the output star folder +docker.runOptions = '--platform=linux/amd64 -u $(id -u):$(id -g)' diff --git a/modules/nf-core/star/align/tests/nextflow.starfusion.config b/modules/nf-core/star/align/tests/nextflow.starfusion.config new file mode 100644 index 0000000..467b649 --- /dev/null +++ b/modules/nf-core/star/align/tests/nextflow.starfusion.config @@ -0,0 +1,14 @@ +process { + + withName: STAR_GENOMEGENERATE { + ext.args = '--genomeSAindexNbases 9' + } + + withName: STAR_ALIGN { + ext.args = '--readFilesCommand zcat --outSAMtype BAM Unsorted --outReadsUnmapped None --twopassMode Basic --outSAMstrandField intronMotif --outSAMunmapped Within --chimSegmentMin 12 --chimJunctionOverhangMin 8 --chimOutJunctionFormat 1 --alignSJDBoverhangMin 10 --alignMatesGapMax 100000 --alignIntronMax 100000 --alignSJstitchMismatchNmax 5 -1 5 5 --chimMultimapScoreRange 3 --chimScoreJunctionNonGTAG -4 --chimMultimapNmax 20 --chimNonchimScoreDropMin 10 --peOverlapNbasesMin 12 --peOverlapMMp 0.1 --alignInsertionFlush Right --alignSplicedMateMapLminOverLmate 0 --alignSplicedMateMapLmin 30' + } + +} + +// Fix chown issue for the output star folder +docker.runOptions = '--platform=linux/amd64 -u $(id -u):$(id -g)' diff --git a/modules/nf-core/star/align/tests/tags.yml b/modules/nf-core/star/align/tests/tags.yml new file mode 100644 index 0000000..8beace1 --- /dev/null +++ b/modules/nf-core/star/align/tests/tags.yml @@ -0,0 +1,2 @@ +star/align: + - modules/nf-core/star/align/** diff --git a/modules/nf-core/star/genomegenerate/environment.yml b/modules/nf-core/star/genomegenerate/environment.yml new file mode 100644 index 0000000..93e4476 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/environment.yml @@ -0,0 +1,11 @@ +name: star_genomegenerate + +channels: + - conda-forge + - bioconda + - defaults + +dependencies: + - bioconda::samtools=1.18 + - bioconda::star=2.7.10a + - conda-forge::gawk=5.1.0 diff --git a/modules/nf-core/star/genomegenerate/main.nf b/modules/nf-core/star/genomegenerate/main.nf index ed32d7c..b885571 100644 --- a/modules/nf-core/star/genomegenerate/main.nf +++ b/modules/nf-core/star/genomegenerate/main.nf @@ -2,15 +2,14 @@ process STAR_GENOMEGENERATE { tag "$fasta" label 'process_high' - conda "bioconda::star=2.7.10a bioconda::samtools=1.16.1 conda-forge::gawk=5.1.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' : - 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' : + 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' }" input: tuple val(meta), path(fasta) tuple val(meta2), path(gtf) - val star_ignore_sjdbgtf output: tuple val(meta), path("star") , emit: index @@ -20,10 +19,10 @@ process STAR_GENOMEGENERATE { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def args_list = args.tokenize() - def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' - def ignore_gtf = star_ignore_sjdbgtf ? '' : "--sjdbGTFfile $gtf" + def args = task.ext.args ?: '' + def args_list = args.tokenize() + def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' + def include_gtf = gtf ? "--sjdbGTFfile $gtf" : '' if (args_list.contains('--genomeSAindexNbases')) { """ mkdir star @@ -31,7 +30,7 @@ process STAR_GENOMEGENERATE { --runMode genomeGenerate \\ --genomeDir star/ \\ --genomeFastaFiles $fasta \\ - $ignore_gtf \\ + $include_gtf \\ --runThreadN $task.cpus \\ $memory \\ $args @@ -53,7 +52,7 @@ process STAR_GENOMEGENERATE { --runMode genomeGenerate \\ --genomeDir star/ \\ --genomeFastaFiles $fasta \\ - $ignore_gtf \\ + $include_gtf \\ --runThreadN $task.cpus \\ --genomeSAindexNbases \$NUM_BASES \\ $memory \\ @@ -69,30 +68,52 @@ process STAR_GENOMEGENERATE { } stub: - """ - mkdir star - touch star/Genome - touch star/Log.out - touch star/SA - touch star/SAindex - touch star/chrLength.txt - touch star/chrName.txt - touch star/chrNameLength.txt - touch star/chrStart.txt - touch star/exonGeTrInfo.tab - touch star/exonInfo.tab - touch star/geneInfo.tab - touch star/genomeParameters.txt - touch star/sjdbInfo.txt - touch star/sjdbList.fromGTF.out.tab - touch star/sjdbList.out.tab - touch star/transcriptInfo.tab + if (gtf) { + """ + mkdir star + touch star/Genome + touch star/Log.out + touch star/SA + touch star/SAindex + touch star/chrLength.txt + touch star/chrName.txt + touch star/chrNameLength.txt + touch star/chrStart.txt + touch star/exonGeTrInfo.tab + touch star/exonInfo.tab + touch star/geneInfo.tab + touch star/genomeParameters.txt + touch star/sjdbInfo.txt + touch star/sjdbList.fromGTF.out.tab + touch star/sjdbList.out.tab + touch star/transcriptInfo.tab - cat <<-END_VERSIONS > versions.yml - "${task.process}": - star: \$(STAR --version | sed -e "s/STAR_//g") - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') - END_VERSIONS - """ + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } else { + """ + mkdir star + touch star/Genome + touch star/Log.out + touch star/SA + touch star/SAindex + touch star/chrLength.txt + touch star/chrName.txt + touch star/chrNameLength.txt + touch star/chrStart.txt + touch star/genomeParameters.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } } diff --git a/modules/nf-core/star/genomegenerate/meta.yml b/modules/nf-core/star/genomegenerate/meta.yml index e824dbf..1061e1b 100644 --- a/modules/nf-core/star/genomegenerate/meta.yml +++ b/modules/nf-core/star/genomegenerate/meta.yml @@ -31,7 +31,6 @@ input: - gtf: type: file description: GTF file of the reference genome - output: - meta: type: map @@ -46,8 +45,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@kevinmenden" - "@drpatelh" - - "@gallvp" +maintainers: + - "@kevinmenden" + - "@drpatelh" diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test b/modules/nf-core/star/genomegenerate/tests/main.nf.test new file mode 100644 index 0000000..af0c942 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test @@ -0,0 +1,117 @@ +nextflow_process { + + name "Test Process STAR_GENOMEGENERATE" + script "../main.nf" + process "STAR_GENOMEGENERATE" + tag "modules" + tag "modules_nfcore" + tag "star" + tag "star/genomegenerate" + + test("homo_sapiens") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_with_gtf") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("homo_sapiens-stub") { + + options '-stub' + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_with_gtf") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("homo_sapiens-without_gtf") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + ]) + input[1] = Channel.of([ [], [] ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_without_gtf") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("homo_sapiens-without_gtf-stub") { + + options '-stub' + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + ]) + input[1] = Channel.of([ [], [] ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_without_gtf") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap new file mode 100644 index 0000000..9de08c7 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap @@ -0,0 +1,22 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + ] + ], + "timestamp": "2023-12-19T11:05:51.741109" + }, + "index_with_gtf": { + "content": [ + "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, exonGeTrInfo.tab, exonInfo.tab, geneInfo.tab, genomeParameters.txt, sjdbInfo.txt, sjdbList.fromGTF.out.tab, sjdbList.out.tab, transcriptInfo.tab]" + ], + "timestamp": "2023-12-19T11:38:14.551548" + }, + "index_without_gtf": { + "content": [ + "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, genomeParameters.txt]" + ], + "timestamp": "2023-12-19T11:38:22.382905" + } +} \ No newline at end of file diff --git a/modules/nf-core/star/genomegenerate/tests/tags.yml b/modules/nf-core/star/genomegenerate/tests/tags.yml new file mode 100644 index 0000000..79f619b --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/tags.yml @@ -0,0 +1,2 @@ +star/genomegenerate: + - modules/nf-core/star/genomegenerate/** diff --git a/modules/nf-core/star/starsolo/main.nf b/modules/nf-core/star/starsolo/main.nf deleted file mode 100644 index 07499b6..0000000 --- a/modules/nf-core/star/starsolo/main.nf +++ /dev/null @@ -1,94 +0,0 @@ -process STARSOLO { - tag "$meta.id" - label 'process_high' - - conda "bioconda::star=2.7.10b" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/star:2.7.10b--h9ee0642_0': - 'biocontainers/star:2.7.10b--h9ee0642_0' }" - - input: - tuple val(meta), val(solotype), path(reads) - tuple val(meta2), path(index) - - output: - tuple val(meta), path('*.Solo.out') , emit: counts - tuple val(meta), path('*Log.final.out') , emit: log_final - tuple val(meta), path('*Log.out') , emit: log_out - tuple val(meta), path('*Log.progress.out') , emit: log_progress - tuple val(meta), path('*/Gene/Summary.csv') , emit: summary - path "versions.yml" , emit: versions - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def (forward, reverse) = reads.collate(2).transpose() - def zcat = reads[0].getExtension() == "gz" ? "--readFilesCommand zcat": "" - - // Handle solotype argument logic - switch(solotype) { - case "CB_UMI_Simple": - solotype_args = meta.umi_len ? "--soloUMIlen ${meta.umi_len} " : ""; - solotype_args = solotype_args + (meta.whitelist ? "--soloCBwhitelist ${meta.whitelist} " : "--soloCBwhitelist None "); - solotype_args = solotype_args + (meta.umi_start ? "--soloUMIstart ${meta.umi_start} " : ""); - solotype_args = solotype_args + (meta.cb_len ? "--soloCBlen ${meta.cb_len} " : ""); - solotype_args = solotype_args + (meta.cb_start ? "--soloCBstart ${meta.cb_start} " : ""); - solotype_args = solotype_args + (meta.barcode_len ? "--soloBarcodeReadLength ${meta.barcode_len} " : ""); - solotype_args = solotype_args + (meta.barcode_mate ? "--soloBarcodeMate ${meta.barcode_mate} " : ""); - break - case "CB_UMI_Complex": - solotype_args = meta.cb_position ? "--soloCBposition ${meta.cb_position}" : ""; - solotype_args = solotype_args + (meta.whitelist ? "--soloCBwhitelist ${meta.whitelist} " : "--soloCBwhitelist None "); - solotype_args = solotype_args + (meta.umi_position ? "--soloUMIposition ${meta.umi_position} " : ""); - solotype_args = solotype_args + (meta.adapter_seq ? "--soloAdapterSequence ${meta.adapter_seq} " : ""); - solotype_args = solotype_args + (meta.max_mismatch_adapter ? "--soloAdapterMismatchesNmax ${meta.max_mismatch_adapter} " : ""); - break - case "SmartSeq": - solotype_args = "--soloUMIdedup Exact "; - solotype_args = solotype_args + (meta.strandedness ? "--soloStrand ${meta.strandedness} " : ""); - solotype_args = solotype_args + "--outSAMattrRGline ID:${prefix} "; - break - default: - log.warn("Unknown output solotype (${solotype})"); - break - } - - """ - STAR \\ - --genomeDir $index \\ - --readFilesIn ${reverse.join( "," )} ${forward.join( "," )} \\ - --runThreadN $task.cpus \\ - --outFileNamePrefix $prefix. \\ - --soloType $solotype \\ - $zcat \\ - $solotype_args \\ - $args - - if [ -d ${prefix}.Solo.out ]; then - find ${prefix}.Solo.out \\( -name "*.tsv" -o -name "*.mtx" \\) -exec gzip {} \\; - fi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - star: \$(STAR --version | sed -e "s/STAR_//g") - END_VERSIONS - """ - - stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - mkdir ${prefix}.Solo.out/ - touch ${prefix}.Solo.out/Log.final.out - touch ${prefix}.Solo.out/Log.out - touch ${prefix}.Solo.out/Log.progress.out - touch ${prefix}.Solo.out/Summary.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - star: \$(STAR --version | sed -e "s/STAR_//g") - END_VERSIONS - """ -} diff --git a/modules/nf-core/star/starsolo/meta.yml b/modules/nf-core/star/starsolo/meta.yml deleted file mode 100644 index 4fce56c..0000000 --- a/modules/nf-core/star/starsolo/meta.yml +++ /dev/null @@ -1,79 +0,0 @@ -name: "starsolo" -description: Create a counts matrix for single-cell data using STARSolo, handling cell barcodes and UMI information. -keywords: - - align - - count - - genome - - reference -tools: - - "starsolo": - description: "Mapping, demultiplexing and quantification for single cell RNA-seq." - homepage: "https://github.com/alexdobin/STAR/" - documentation: "https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md" - doi: "10.1101/2021.05.05.442755" - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information. - Here, you should add all the specific barcode/umi - information for each sample. - e.g. `[ id:'test_starsolo', umi_len:'12', cb_start:1 ]` - - solotype: - type: string - description: | - Type of single-cell library. - It can be CB_UMI_Simple for most common ones such as 10xv2 and 10xv3, - CB_UMI_Complex for method such as inDrop and SmartSeq for SMART-Seq. - - meta2: - type: map - description: Groovy Map containing the STAR index information. - - index: - type: directory - description: STAR genome index - pattern: "star" - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. -output: - - meta: - type: map - description: | - Groovy Map containing sample information. - Here, you should add all the specific barcode/umi - information for each sample. - e.g. `[ id:'test_starsolo', umi_len:'12', cb_start:1 ]` - - log_final: - type: file - description: STAR final log file - pattern: "*Log.final.out" - - log_out: - type: file - description: STAR lot out file - pattern: "*Log.out" - - log_progress: - type: file - description: STAR log progress file - pattern: "*Log.progress.out" - - summary: - type: file - description: STARSolo metrics summary CSV file. - pattern: "*/Gene/Summary.csv" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@kevinmenden" - - "@ggabernet" - - "@grst" - - "@fmalmeida" - - "@rhreynolds" - - "@apeltzer" - - "@vivian-chen16" - - "@maxulysse" - - "@joaodemeirelles" diff --git a/modules/nf-core/trinity/main.nf b/modules/nf-core/trinity/main.nf deleted file mode 100644 index 3960a35..0000000 --- a/modules/nf-core/trinity/main.nf +++ /dev/null @@ -1,74 +0,0 @@ -process TRINITY { - tag "$meta.id" - label 'process_high_memory' - - conda "bioconda::trinity=2.13.2" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/trinity:2.13.2--h00214ad_1': - 'biocontainers/trinity:2.13.2--h00214ad_1' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("*.fa.gz") , emit: transcript_fasta - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - - if (meta.single_end) { - reads_args = "--single ${reads}" - } else { - reads_args = "--left ${reads[0]} --right ${reads[1]}" - } - - // --seqType argument, fasta or fastq. Exact pattern match .fasta or .fa suffix with optional .gz (gzip) suffix - seqType_args = reads[0] ==~ /(.*fasta(.gz)?$)|(.*fa(.gz)?$)/ ? "fa" : "fq" - - // Define the memory requirements. Trinity needs this as an option. - def avail_mem = 7 - if (!task.memory) { - log.info '[Trinity] Available memory not known - defaulting to 7GB. Specify process memory requirements to change this.' - } else { - avail_mem = (task.memory.giga*0.8).intValue() - } - - """ - # Note that Trinity needs the word 'trinity' in the outdir - - Trinity \\ - --seqType ${seqType_args} \\ - --max_memory ${avail_mem}G \\ - ${reads_args} \\ - --output ${prefix}_trinity \\ - --CPU $task.cpus \\ - $args - - gzip -cf ${prefix}_trinity.Trinity.fasta > ${prefix}.fa.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - trinity: \$(echo \$(Trinity --version | head -n 1 2>&1) | sed 's/^Trinity version: Trinity-v//' )) - END_VERSIONS - - # Need to only take the first line of --version since it will warn about not being up-to-date and this messes up the version.yaml. - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.fa.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - trinity: \$(echo \$(Trinity --version | head -n 1 2>&1) | sed 's/^Trinity version: Trinity-v//' )) - END_VERSIONS - - # Need to only take the first line of --version since it will warn about not being up-to-date and this messes up the version.yaml. - """ -} diff --git a/modules/nf-core/trinity/meta.yml b/modules/nf-core/trinity/meta.yml deleted file mode 100644 index 26e8c10..0000000 --- a/modules/nf-core/trinity/meta.yml +++ /dev/null @@ -1,45 +0,0 @@ -name: "trinity" -description: Assembles a de novo transcriptome from RNAseq reads -keywords: - - assembly - - de novo assembler - - fasta - - fastq -tools: - - "trinity": - description: "Trinity assembles transcript sequences from Illumina RNA-Seq data." - homepage: "https://github.com/trinityrnaseq/trinityrnaseq/wiki" - documentation: "https://github.com/trinityrnaseq/trinityrnaseq/wiki" - tool_dev_url: "https://github.com/trinityrnaseq/trinityrnaseq/" - doi: "10.1038/nbt.1883" - licence: "['BSD-3-clause']" - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: fasta/fastq file of reads to be assembled into a transcriptome - pattern: "*.{fa|fasta|fq|fastq}" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - transcript_fasta: - type: file - description: de novo assembled transcripts fasta file compressed - pattern: "*.fa.gz" - -authors: - - "@timslittle" - - "@gallvp" diff --git a/modules/nf-core/umitools/dedup/main.nf b/modules/nf-core/umitools/dedup/main.nf deleted file mode 100644 index 56ea046..0000000 --- a/modules/nf-core/umitools/dedup/main.nf +++ /dev/null @@ -1,62 +0,0 @@ -process UMITOOLS_DEDUP { - tag "$meta.id" - label "process_medium" - - conda "bioconda::umi_tools=1.1.4" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.4--py38hbff2b2d_1' : - 'biocontainers/umi_tools:1.1.4--py38hbff2b2d_1' }" - - input: - tuple val(meta), path(bam), path(bai) - val get_output_stats - - output: - tuple val(meta), path("${prefix}.bam") , emit: bam - tuple val(meta), path("*.log") , emit: log - tuple val(meta), path("*edit_distance.tsv"), optional:true, emit: tsv_edit_distance - tuple val(meta), path("*per_umi.tsv") , optional:true, emit: tsv_per_umi - tuple val(meta), path("*per_position.tsv") , optional:true, emit: tsv_umi_per_position - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - def paired = meta.single_end ? "" : "--paired" - stats = get_output_stats ? "--output-stats ${prefix}" : "" - if ("$bam" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!" - - if (!(args ==~ /.*--random-seed.*/)) {args += " --random-seed=100"} - """ - PYTHONHASHSEED=0 umi_tools \\ - dedup \\ - -I $bam \\ - -S ${prefix}.bam \\ - -L ${prefix}.log \\ - $stats \\ - $paired \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//') - END_VERSIONS - """ - - stub: - """ - touch ${prefix}.bam - touch ${prefix}.log - touch ${prefix}_edit_distance.tsv - touch ${prefix}_per_umi.tsv - touch ${prefix}_per_position.tsv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/umitools/dedup/meta.yml b/modules/nf-core/umitools/dedup/meta.yml deleted file mode 100644 index 534d4c6..0000000 --- a/modules/nf-core/umitools/dedup/meta.yml +++ /dev/null @@ -1,68 +0,0 @@ -name: umitools_dedup -description: Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read. -keywords: - - umitools - - deduplication - - dedup -tools: - - umi_tools: - description: > - UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) - and single cell RNA-Seq cell barcodes - documentation: https://umi-tools.readthedocs.io/en/latest/ - license: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: | - BAM file containing reads to be deduplicated via UMIs. - pattern: "*.{bam}" - - bai: - type: file - description: | - BAM index files corresponding to the input BAM file. - pattern: "*.{bai}" - - get_output_stats: - type: boolean - description: | - Whether or not to generate output stats. -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM file with deduplicated UMIs. - pattern: "*.{bam}" - - log: - type: file - description: File with logging information - pattern: "*.{log}" - - tsv_edit_distance: - type: file - description: Reports the (binned) average edit distance between the UMIs at each position. - pattern: "*edit_distance.tsv" - - tsv_per_umi: - type: file - description: UMI-level summary statistics. - pattern: "*per_umi.tsv" - - tsv_umi_per_position: - type: file - description: Tabulates the counts for unique combinations of UMI and position. - pattern: "*per_position.tsv" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@drpatelh" - - "@grst" - - "@klkeys" diff --git a/modules/nf-core/umitools/extract/environment.yml b/modules/nf-core/umitools/extract/environment.yml new file mode 100644 index 0000000..7d08ac0 --- /dev/null +++ b/modules/nf-core/umitools/extract/environment.yml @@ -0,0 +1,7 @@ +name: umitools_extract +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::umi_tools=1.1.4 diff --git a/modules/nf-core/umitools/extract/main.nf b/modules/nf-core/umitools/extract/main.nf index 2f94fa9..4bd79e7 100644 --- a/modules/nf-core/umitools/extract/main.nf +++ b/modules/nf-core/umitools/extract/main.nf @@ -3,7 +3,7 @@ process UMITOOLS_EXTRACT { label "process_single" label "process_long" - conda "bioconda::umi_tools=1.1.4" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.4--py38hbff2b2d_1' : 'biocontainers/umi_tools:1.1.4--py38hbff2b2d_1' }" @@ -33,7 +33,7 @@ process UMITOOLS_EXTRACT { cat <<-END_VERSIONS > versions.yml "${task.process}": - umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//') + umitools: \$( umi_tools --version | sed '/version:/!d; s/.*: //' ) END_VERSIONS """ } else { @@ -49,7 +49,7 @@ process UMITOOLS_EXTRACT { cat <<-END_VERSIONS > versions.yml "${task.process}": - umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//') + umitools: \$( umi_tools --version | sed '/version:/!d; s/.*: //' ) END_VERSIONS """ } diff --git a/modules/nf-core/umitools/extract/meta.yml b/modules/nf-core/umitools/extract/meta.yml index db64a0f..7695b27 100644 --- a/modules/nf-core/umitools/extract/meta.yml +++ b/modules/nf-core/umitools/extract/meta.yml @@ -1,15 +1,16 @@ name: umitools_extract description: Extracts UMI barcode from a read and add it to the read name, leaving any sample barcode in place keywords: - - umitools + - UMI + - barcode - extract + - umitools tools: - umi_tools: description: > - UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) - and single cell RNA-Seq cell barcodes + UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes documentation: https://umi-tools.readthedocs.io/en/latest/ - license: ["MIT"] + license: "MIT" input: - meta: type: map @@ -29,9 +30,7 @@ output: - reads: type: file description: > - Extracted FASTQ files. | - For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | - For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz. + Extracted FASTQ files. | For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz. pattern: "*.{fastq.gz}" - log: type: file @@ -41,7 +40,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@drpatelh" - "@grst" +maintainers: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/umitools/extract/tests/main.nf.test b/modules/nf-core/umitools/extract/tests/main.nf.test new file mode 100644 index 0000000..22242d1 --- /dev/null +++ b/modules/nf-core/umitools/extract/tests/main.nf.test @@ -0,0 +1,35 @@ +nextflow_process { + + name "Test Process UMITOOLS_EXTRACT" + script "../main.nf" + process "UMITOOLS_EXTRACT" + config "./nextflow.config" + tag "modules_nfcore" + tag "modules" + tag "umitools" + tag "umitools/extract" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } +} \ No newline at end of file diff --git a/modules/nf-core/umitools/extract/tests/main.nf.test.snap b/modules/nf-core/umitools/extract/tests/main.nf.test.snap new file mode 100644 index 0000000..6d5944f --- /dev/null +++ b/modules/nf-core/umitools/extract/tests/main.nf.test.snap @@ -0,0 +1,10 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,5a18da2d3a5a4de15e7aaae9082d7abb" + ] + ], + "timestamp": "2023-12-08T09:41:43.540658352" + } +} \ No newline at end of file diff --git a/modules/nf-core/umitools/extract/tests/nextflow.config b/modules/nf-core/umitools/extract/tests/nextflow.config new file mode 100644 index 0000000..628f5fc --- /dev/null +++ b/modules/nf-core/umitools/extract/tests/nextflow.config @@ -0,0 +1,9 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: UMITOOLS_EXTRACT { + ext.args = '--bc-pattern="NNNN"' + } + +} diff --git a/modules/nf-core/umitools/extract/tests/tags.yml b/modules/nf-core/umitools/extract/tests/tags.yml new file mode 100644 index 0000000..c3fb23d --- /dev/null +++ b/modules/nf-core/umitools/extract/tests/tags.yml @@ -0,0 +1,2 @@ +umitools/extract: + - modules/nf-core/umitools/extract/** diff --git a/modules/nf-core/umitools/group/main.nf b/modules/nf-core/umitools/group/main.nf deleted file mode 100644 index 9a6370b..0000000 --- a/modules/nf-core/umitools/group/main.nf +++ /dev/null @@ -1,62 +0,0 @@ -process UMITOOLS_GROUP { - tag "$meta.id" - label 'process_medium' - - conda "bioconda::umi_tools=1.1.4" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.4--py38hbff2b2d_1' : - 'biocontainers/umi_tools:1.1.4--py38hbff2b2d_1' }" - - input: - tuple val(meta), path(bam), path(bai) - val create_bam - val get_group_info - - output: - tuple val(meta), path("*.log") , emit: log - tuple val(meta), path("${prefix}.bam"), optional: true, emit: bam - tuple val(meta), path("*.tsv") , optional: true, emit: tsv - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - def paired = meta.single_end ? "" : "--paired" - output_bam = create_bam ? "--output-bam -S ${prefix}.bam" : "" - group_info = get_group_info ? "--group-out ${prefix}.tsv" : "" - - if (create_bam && "$bam" == "${prefix}.bam") { error "Input and output names are the same, set prefix in module configuration to disambiguate!" } - - if (!(args ==~ /.*--random-seed.*/)) {args += " --random-seed=100"} - """ - PYTHONHASHSEED=0 umi_tools \\ - group \\ - -I $bam \\ - $output_bam \\ - -L ${prefix}.log \\ - $group_info \\ - $paired \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//') - END_VERSIONS - """ - - stub: - prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.bam - touch ${prefix}.log - touch ${prefix}.tsv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/umitools/group/meta.yml b/modules/nf-core/umitools/group/meta.yml deleted file mode 100644 index 1fa826d..0000000 --- a/modules/nf-core/umitools/group/meta.yml +++ /dev/null @@ -1,62 +0,0 @@ -name: umitools_group -description: Group reads based on their UMI and mapping coordinates -keywords: - - umitools - - umi - - deduplication - - dedup - - clustering -tools: - - umi_tools: - description: > - UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) - and single cell RNA-Seq cell barcodes - documentation: https://umi-tools.readthedocs.io/en/latest/ - license: ["MIT"] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: | - BAM file containing reads to be deduplicated via UMIs. - pattern: "*.{bam}" - - bai: - type: file - description: | - BAM index files corresponding to the input BAM file. - pattern: "*.{bai}" - - create_bam: - type: boolean - description: | - Whether or not to create a read group tagged BAM file. - - get_group_info: - type: boolean - description: | - Whether or not to generate the flatfile describing the read groups, see docs for complete info of all columns - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: a read group tagged BAM file. - pattern: "${prefix}.{bam}" - - log: - type: file - description: File with logging information - pattern: "*.{log}" - - tsv: - type: file - description: Flatfile describing the read groups, see docs for complete info of all columns - pattern: "*.{tsv}" - -authors: - - "@Joon-Klaps" diff --git a/modules/pfr/custom/restoregffids/environment.yml b/modules/pfr/custom/restoregffids/environment.yml new file mode 100644 index 0000000..2450c45 --- /dev/null +++ b/modules/pfr/custom/restoregffids/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "custom_restoregffids" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "python=3.10.2" diff --git a/modules/pfr/custom/restoregffids/main.nf b/modules/pfr/custom/restoregffids/main.nf new file mode 100644 index 0000000..14e2c07 --- /dev/null +++ b/modules/pfr/custom/restoregffids/main.nf @@ -0,0 +1,35 @@ +process CUSTOM_RESTOREGFFIDS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.10.2': + 'biocontainers/python:3.10.2' }" + + input: + tuple val(meta), path(gff3) + path(ids_tsv) + + output: + tuple val(meta), path("*.restored.ids.gff3") , emit: restored_ids_gff3 + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + prefix = task.ext.prefix ?: "${meta.id}" + template 'restore_gff_ids.py' + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch "${prefix}.restored.ids.gff3" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | cut -d' ' -f2) + END_VERSIONS + """ +} diff --git a/modules/pfr/custom/restoregffids/meta.yml b/modules/pfr/custom/restoregffids/meta.yml new file mode 100644 index 0000000..4e42b82 --- /dev/null +++ b/modules/pfr/custom/restoregffids/meta.yml @@ -0,0 +1,58 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "custom_restoregffids" +description: | + Restores IDs in a gff3 file based on a TSV table + consisting of original (first column) and new IDs (second column). + This module is helpful when some tools like EDTA implicitly shorten + the IDs without producing the ID map, leading to downstream mismatch + in IDs across files. +keywords: + - genome + - gff + - ID + - shorten + - restore +tools: + - "python": + description: | + Python is a programming language that lets you work quickly + and integrate systems more effectively + homepage: "https://www.python.org" + documentation: "https://docs.python.org/3/" + tool_dev_url: "https://github.com/python/cpython" + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - gff3: + type: file + description: Input gff3 file + pattern: "*.{gff,gff3}" + - ids_tsv: + type: file + description: | + A TSV file with original (first column) and new ids (second column) + if id change was required + pattern: "*.tsv" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - restored_ids_gff3: + type: file + description: GFF3 file with restored ids + pattern: "*.restored.ids.gff3" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/pfr/custom/restoregffids/templates/restore_gff_ids.py b/modules/pfr/custom/restoregffids/templates/restore_gff_ids.py new file mode 100755 index 0000000..d0699de --- /dev/null +++ b/modules/pfr/custom/restoregffids/templates/restore_gff_ids.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 + +from platform import python_version + +ids_tsv = "$ids_tsv" +input_gff3 = "$gff3" +output_prefix = "$prefix" + + +def create_name_mapping_from_tsv(file_path): + dictionary = {} + + with open(file_path, "r") as tsv_file: + for line in tsv_file: + columns = line.strip().split("\\t") + if len(columns) != 2: + raise ValueError(f"{file_path} should be a two column TSV file") + + orig_id, new_id = columns[0], columns[1] + dictionary[new_id] = orig_id + + return dictionary + + +def restore_gff3_ids(new_to_orig_ids, file_path, output_file_name): + # Write versions + with open(f"versions.yml", "w") as f_versions: + f_versions.write('"${task.process}":\\n') + f_versions.write(f" python: {python_version()}\\n") + + with open(file_path, "r") as input_gff3_file: + input_lines = input_gff3_file.readlines() + + with open(output_file_name, "w") as output_gff_file: + for line in input_lines: + if line.startswith("##"): + output_gff_file.write(line) + continue + + new_id = line.split("\\t")[0] + orig_id = new_to_orig_ids[new_id] + output_gff_file.write("\\t".join([orig_id] + line.split("\\t")[1:])) + + +if __name__ == "__main__": + new_to_orig_ids = create_name_mapping_from_tsv(ids_tsv) + restore_gff3_ids(new_to_orig_ids, input_gff3, f"{output_prefix}.restored.ids.gff3") diff --git a/modules/pfr/custom/restoregffids/tests/main.nf.test b/modules/pfr/custom/restoregffids/tests/main.nf.test new file mode 100644 index 0000000..521b924 --- /dev/null +++ b/modules/pfr/custom/restoregffids/tests/main.nf.test @@ -0,0 +1,63 @@ +nextflow_process { + + name "Test Process CUSTOM_RESTOREGFFIDS" + script "../main.nf" + process "CUSTOM_RESTOREGFFIDS" + + tag "modules" + tag "modules_nfcore" + tag "custom" + tag "custom/restoregffids" + + test("sarscov2-genome_gff3-success") { + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true) + ] + input[1] = Channel.of('Chr1\tMT192765.1').collectFile(name: 'id_map.tsv', newLine: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert path(process.out.restored_ids_gff3.get(0).get(1)).getText().contains("Chr1") }, + { assert !path(process.out.restored_ids_gff3.get(0).get(1)).getText().contains("MT192765.1") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true) + ] + input[1] = Channel.of('Chr1\tMT192765.1').collectFile(name: 'id_map.tsv', newLine: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.restored_ids_gff3 != null }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + +} diff --git a/modules/pfr/custom/restoregffids/tests/main.nf.test.snap b/modules/pfr/custom/restoregffids/tests/main.nf.test.snap new file mode 100644 index 0000000..ffe43e7 --- /dev/null +++ b/modules/pfr/custom/restoregffids/tests/main.nf.test.snap @@ -0,0 +1,41 @@ +{ + "sarscov2-genome_gff3-success": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.restored.ids.gff3:md5,2c294938b9eb4e52d19e14725c1d92a9" + ] + ], + "1": [ + "versions.yml:md5,32d31c4f1da9a3d1be013fd163e5867e" + ], + "restored_ids_gff3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.restored.ids.gff3:md5,2c294938b9eb4e52d19e14725c1d92a9" + ] + ], + "versions": [ + "versions.yml:md5,32d31c4f1da9a3d1be013fd163e5867e" + ] + } + ], + "timestamp": "2023-12-07T13:49:30.047425" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,32d31c4f1da9a3d1be013fd163e5867e" + ] + ], + "timestamp": "2023-12-07T13:49:30.071175" + } +} \ No newline at end of file diff --git a/modules/pfr/custom/restoregffids/tests/tags.yml b/modules/pfr/custom/restoregffids/tests/tags.yml new file mode 100644 index 0000000..1d4b9a8 --- /dev/null +++ b/modules/pfr/custom/restoregffids/tests/tags.yml @@ -0,0 +1,2 @@ +custom/restoregffids: + - "modules/pfr/custom/restoregffids/**" diff --git a/modules/pfr/custom/shortenfastaids/environment.yml b/modules/pfr/custom/shortenfastaids/environment.yml new file mode 100644 index 0000000..e80fa7c --- /dev/null +++ b/modules/pfr/custom/shortenfastaids/environment.yml @@ -0,0 +1,11 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "custom_shortenfastaids" +channels: + - conda-forge + - bioconda + - defaults + +dependencies: + - biopython==1.75 + - python=3.8 diff --git a/modules/pfr/custom/shortenfastaids/main.nf b/modules/pfr/custom/shortenfastaids/main.nf new file mode 100644 index 0000000..92762ef --- /dev/null +++ b/modules/pfr/custom/shortenfastaids/main.nf @@ -0,0 +1,34 @@ +process CUSTOM_SHORTENFASTAIDS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/biopython:1.75': + 'biocontainers/biopython:1.75' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.short.ids.fasta") , emit: short_ids_fasta , optional: true + tuple val(meta), path("*.short.ids.tsv") , emit: short_ids_tsv , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + prefix = task.ext.prefix ?: "${meta.id}" + template 'shorten_fasta_ids.py' + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | cut -d' ' -f2) + biopython: \$(pip list | grep "biopython" | cut -d' ' -f3) + END_VERSIONS + """ +} diff --git a/modules/pfr/custom/shortenfastaids/meta.yml b/modules/pfr/custom/shortenfastaids/meta.yml new file mode 100644 index 0000000..2425810 --- /dev/null +++ b/modules/pfr/custom/shortenfastaids/meta.yml @@ -0,0 +1,58 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "custom_shortenfastaids" +description: | + Shortens fasta IDs and produces a new fasta along with a TSV table + consisting of original (first column) and new IDs (second column). + This module is helpful when some tools like EDTA implicitly shorten + the IDs without producing the ID map, leading to downstream mismatch + in IDs across files. +keywords: + - genome + - fasta + - ID + - shorten +tools: + - "biopython": + description: | + Biopython is a set of freely available tools for biological computation written in Python by + an international team of developers. + homepage: "https://biopython.org" + documentation: "https://biopython.org/wiki/Documentation" + tool_dev_url: "https://github.com/biopython/biopython" + doi: "10.1093/bioinformatics/btp163" + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - fasta: + type: file + description: Input fasta file + pattern: "*.{fsa,fa,fasta}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - short_ids_fasta: + type: file + description: Fasta file with shortened ids if id change is required + pattern: "*.{fsa,fa,fasta}" + - short_ids_tsv: + type: file + description: | + A TSV file with original (first column) and new ids (second column) + if id change is required + pattern: "*.tsv" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/local/edta/shorten_edta_ids/resources/usr/bin/shorten_fasta_ids_c97537f.py b/modules/pfr/custom/shortenfastaids/templates/shorten_fasta_ids.py similarity index 55% rename from modules/local/edta/shorten_edta_ids/resources/usr/bin/shorten_fasta_ids_c97537f.py rename to modules/pfr/custom/shortenfastaids/templates/shorten_fasta_ids.py index e5b62b3..54f35bf 100755 --- a/modules/local/edta/shorten_edta_ids/resources/usr/bin/shorten_fasta_ids_c97537f.py +++ b/modules/pfr/custom/shortenfastaids/templates/shorten_fasta_ids.py @@ -1,29 +1,22 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import re -import sys from Bio import SeqIO - -# https://github.com/Plant-Food-Research-Open/assembly_qc -# GPL-3.0: https://github.com/Plant-Food-Research-Open/assembly_qc/blob/main/LICENSE +from importlib.metadata import version +from platform import python_version # The input fasta file path -fasta_file_path = sys.argv[1] - -# The prefix for output files: prefix.renamed.ids.fa, prefix.renamed.ids.tsv -output_files_prefix = sys.argv[2] - -# In the case where IDs have acceptable character and no change is needed, the output is stdout: -# "IDs have acceptable length and character. No change required." +fasta_file_path = "$fasta" +output_files_prefix = "$prefix" -def extract_fasta_ids(fasta_file_path): +def extract_fasta_ids_and_descriptions(fasta_file_path): fasta_file_obj = SeqIO.parse(fasta_file_path, "fasta") ids = [] for record in fasta_file_obj: - ids.append(record.id) + ids.append((record.id, record.description)) return ids @@ -41,29 +34,39 @@ def write_fasta_with_new_ids(fasta_file_path, id_mapping, file_prefix): replaced_records.append(record) - SeqIO.write(replaced_records, f"{file_prefix}.renamed.ids.fa", "fasta") + SeqIO.write(replaced_records, f"{file_prefix}.short.ids.fasta", "fasta") -def write_fasta_without_comments(fasta_file_path, file_prefix): - old_fasta_file_obj = SeqIO.parse(fasta_file_path, "fasta") - - replaced_records = [] - for record in old_fasta_file_obj: - record.description = "" - replaced_records.append(record) - - SeqIO.write(replaced_records, f"{file_prefix}.renamed.ids.fa", "fasta") +def do_id_need_to_change(id_and_description, silent=False): + id = id_and_description[0] + description = id_and_description[1] + if len(id) > 13: + if not silent: + print(f"{id} has length greater than 13") + return True + if not re.match(r"^[a-zA-Z0-9_]+\$", id): + if not silent: + print(f"{id} does not match '^[a-zA-Z0-9_]+\$'") + return True -def do_id_need_to_change(id): - if len(id) > 13 or not re.match(r"^[a-zA-Z0-9_]+$", id): + if description != id and description != "": + if not silent: + print(f"{id} contains a comment: {description.replace(id, '')}") return True + if not silent: + print(f"{id} is acceptable") return False -def do_ids_need_to_change(ids): - return any([do_id_need_to_change(id) for id in ids]) +def do_ids_need_to_change(ids_and_descriptions, silent=False): + return any( + [ + do_id_need_to_change(id_and_description, silent) + for id_and_description in ids_and_descriptions + ] + ) def extract_common_patterns(ids): @@ -83,23 +86,25 @@ def extract_common_patterns(ids): return {pattern: pattern[:3] for pattern in common_patterns} -def shorten_ids(ids, patterns_dict): +def shorten_ids(input_ids_and_descriptions, patterns_dict): shortened_ids = [] - for id in ids: - if not do_id_need_to_change(id): + for id_and_description in input_ids_and_descriptions: + id = id_and_description[0] + description = "" # Treat description as absent as it will be removed by write_fasta_with_new_ids + if not do_id_need_to_change((id, description), silent=True): shortened_ids.append(id) continue shortened_id = shorten_id_by_pattern_replacement(patterns_dict, id) - if not do_id_need_to_change(shortened_id): + if not do_id_need_to_change((shortened_id, description), silent=True): shortened_ids.append(shortened_id) continue shortened_id = f"Ctg{generate_hash(id)}" - if not do_id_need_to_change(shortened_id): + if not do_id_need_to_change((shortened_id, description), silent=True): shortened_ids.append(shortened_id) continue @@ -149,24 +154,27 @@ def fail_if_new_ids_not_valid(ids): if __name__ == "__main__": - input_ids = extract_fasta_ids(fasta_file_path) + input_ids_and_descriptions = extract_fasta_ids_and_descriptions(fasta_file_path) + input_ids = [x[0] for x in input_ids_and_descriptions] - if not do_ids_need_to_change(input_ids): - print("IDs have acceptable length and character. No change required.") - - with open(f"{output_files_prefix}.renamed.ids.tsv", "w") as f: - f.write("IDs have acceptable length and character. No change required.") - - write_fasta_without_comments(fasta_file_path, output_files_prefix) + # Write versions + with open(f"versions.yml", "w") as f_versions: + f_versions.write('"${task.process}":\\n') + f_versions.write(f" python: {python_version()}\\n") + f_versions.write(f" biopython: {version('biopython')}\\n") + if not do_ids_need_to_change(input_ids_and_descriptions): + print("IDs have acceptable length and character. No change required.") exit(0) - new_ids = shorten_ids(input_ids, extract_common_patterns(input_ids)) + new_ids = shorten_ids( + input_ids_and_descriptions, extract_common_patterns(input_ids) + ) fail_if_new_ids_not_valid(new_ids) - with open(f"{output_files_prefix}.renamed.ids.tsv", "w") as f: + with open(f"{output_files_prefix}.short.ids.tsv", "w") as f: for input_id, new_id in zip(input_ids, new_ids): - f.write(f"{input_id}\t{new_id}\n") + f.write(f"{input_id}\\t{new_id}\\n") write_fasta_with_new_ids( fasta_file_path, zip(input_ids, new_ids), output_files_prefix diff --git a/modules/pfr/custom/shortenfastaids/tests/main.nf.test b/modules/pfr/custom/shortenfastaids/tests/main.nf.test new file mode 100644 index 0000000..dc46bae --- /dev/null +++ b/modules/pfr/custom/shortenfastaids/tests/main.nf.test @@ -0,0 +1,131 @@ +nextflow_process { + + name "Test Process CUSTOM_SHORTENFASTAIDS" + script "../main.nf" + process "CUSTOM_SHORTENFASTAIDS" + + tag "modules" + tag "modules_nfcore" + tag "custom" + tag "custom/shortenfastaids" + + test("homo_sapiens-genome_fasta-no_change") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot(process.out.versions).match("versions") }, + { assert process.out.short_ids_fasta == [] }, + { assert process.out.short_ids_tsv == [] } + ) + } + + } + + test("sarscov2-genome_fasta-pattern_change") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("homo_sapiens-genome2_fasta-length_change") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.test_data['homo_sapiens']['genome']['genome2_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("custom_fasta-comment_change") { + + when { + process { + """ + input[0] = Channel.of('>Chr1 This is a test comment', 'AGCTAGCT') + | collectFile(name: 'sample.fasta', newLine: true) + | map { file -> [ [ id:'test' ], file ] } + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("versions") }, + { assert process.out.short_ids_fasta == [] }, + { assert process.out.short_ids_tsv == [] } + ) + } + + } + +} diff --git a/modules/pfr/custom/shortenfastaids/tests/main.nf.test.snap b/modules/pfr/custom/shortenfastaids/tests/main.nf.test.snap new file mode 100644 index 0000000..8fed1b9 --- /dev/null +++ b/modules/pfr/custom/shortenfastaids/tests/main.nf.test.snap @@ -0,0 +1,170 @@ +{ + "custom_fasta-comment_change": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.short.ids.fasta:md5,c861b9d46a4d9bdba66953cff572fc5d" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.short.ids.tsv:md5,8762f2bffbdff75c2812bad72ba52bba" + ] + ], + "2": [ + "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba" + ], + "short_ids_fasta": [ + [ + { + "id": "test" + }, + "test.short.ids.fasta:md5,c861b9d46a4d9bdba66953cff572fc5d" + ] + ], + "short_ids_tsv": [ + [ + { + "id": "test" + }, + "test.short.ids.tsv:md5,8762f2bffbdff75c2812bad72ba52bba" + ] + ], + "versions": [ + "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba" + ] + } + ], + "timestamp": "2023-12-07T13:33:05.523745" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba" + ] + ], + "timestamp": "2023-12-07T13:30:30.361527" + }, + "homo_sapiens-genome_fasta-no_change": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba" + ], + "short_ids_fasta": [ + + ], + "short_ids_tsv": [ + + ], + "versions": [ + "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba" + ] + } + ], + "timestamp": "2023-12-07T13:32:54.220188" + }, + "homo_sapiens-genome2_fasta-length_change": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.short.ids.fasta:md5,1382acd98d4cd233a8062ef01b2aaa6d" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.short.ids.tsv:md5,99c0f2a529cb595b2d8530024ed2880e" + ] + ], + "2": [ + "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba" + ], + "short_ids_fasta": [ + [ + { + "id": "test" + }, + "test.short.ids.fasta:md5,1382acd98d4cd233a8062ef01b2aaa6d" + ] + ], + "short_ids_tsv": [ + [ + { + "id": "test" + }, + "test.short.ids.tsv:md5,99c0f2a529cb595b2d8530024ed2880e" + ] + ], + "versions": [ + "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba" + ] + } + ], + "timestamp": "2023-12-07T13:33:01.924483" + }, + "sarscov2-genome_fasta-pattern_change": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.short.ids.fasta:md5,14d6f587b6d28889c5c0f985e78d602f" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.short.ids.tsv:md5,d7a2af88e8549586e5616bff6a88bd71" + ] + ], + "2": [ + "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba" + ], + "short_ids_fasta": [ + [ + { + "id": "test" + }, + "test.short.ids.fasta:md5,14d6f587b6d28889c5c0f985e78d602f" + ] + ], + "short_ids_tsv": [ + [ + { + "id": "test" + }, + "test.short.ids.tsv:md5,d7a2af88e8549586e5616bff6a88bd71" + ] + ], + "versions": [ + "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba" + ] + } + ], + "timestamp": "2023-12-07T13:32:58.12885" + } +} \ No newline at end of file diff --git a/modules/pfr/custom/shortenfastaids/tests/tags.yml b/modules/pfr/custom/shortenfastaids/tests/tags.yml new file mode 100644 index 0000000..4715b64 --- /dev/null +++ b/modules/pfr/custom/shortenfastaids/tests/tags.yml @@ -0,0 +1,2 @@ +custom/shortenfastaids: + - "modules/pfr/custom/shortenfastaids/**" diff --git a/modules/pfr/edta/edta/environment.yml b/modules/pfr/edta/edta/environment.yml new file mode 100644 index 0000000..63160e8 --- /dev/null +++ b/modules/pfr/edta/edta/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "edta_edta" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::edta=2.1.0" diff --git a/modules/pfr/edta/edta/main.nf b/modules/pfr/edta/edta/main.nf new file mode 100644 index 0000000..a81c528 --- /dev/null +++ b/modules/pfr/edta/edta/main.nf @@ -0,0 +1,93 @@ +process EDTA_EDTA { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/edta:2.1.0--hdfd78af_1': + 'biocontainers/edta:2.1.0--hdfd78af_1' }" + + input: + tuple val(meta), path(fasta) + path cds + path curatedlib + path rmout + path exclude + + output: + tuple val(meta), path('*.log') , emit: log + tuple val(meta), path('*.EDTA.TElib.fa') , emit: te_lib_fasta + tuple val(meta), path('*.EDTA.pass.list') , emit: pass_list , optional: true + tuple val(meta), path('*.EDTA.out') , emit: out_file , optional: true + tuple val(meta), path('*.EDTA.TEanno.gff3') , emit: te_anno_gff3 , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def mod_file_name = "${fasta}.mod" + def cds_file = cds ? "--cds $cds" : '' + def curatedlib_file = curatedlib ? "--curatedlib $curatedlib": '' + def rmout_file = rmout ? "--rmout $rmout" : '' + def exclude_file = exclude ? "--exclude $exclude" : '' + """ + EDTA.pl \\ + --genome $fasta \\ + --threads $task.cpus \\ + $cds_file \\ + $curatedlib_file \\ + $rmout_file \\ + $exclude_file \\ + $args \\ + &> >(tee "${prefix}.log" 2>&1) + + mv \\ + "${mod_file_name}.EDTA.TElib.fa" \\ + "${prefix}.EDTA.TElib.fa" + + [ -f "${mod_file_name}.EDTA.raw/LTR/${mod_file_name}.pass.list" ] \\ + && mv \\ + "${mod_file_name}.EDTA.raw/LTR/${mod_file_name}.pass.list" \\ + "${prefix}.EDTA.pass.list" \\ + || echo "EDTA did not produce a pass.list file" + + [ -f "${mod_file_name}.EDTA.anno/${mod_file_name}.out" ] \\ + && mv \\ + "${mod_file_name}.EDTA.anno/${mod_file_name}.out" \\ + "${prefix}.EDTA.out" \\ + || echo "EDTA did not produce an out file" + + [ -f "${mod_file_name}.EDTA.TEanno.gff3" ] \\ + && mv \\ + "${mod_file_name}.EDTA.TEanno.gff3" \\ + "${prefix}.EDTA.TEanno.gff3" \\ + || echo "EDTA did not produce a TEanno gff3 file" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + EDTA: \$(EDTA.pl -h | awk ' /##### Extensive/ {print \$7}') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def touch_pass_list = args.contains("--anno 1") ? "touch ${prefix}.EDTA.pass.list" : '' + def touch_out_file = args.contains("--anno 1") ? "touch ${prefix}.EDTA.out" : '' + def touch_te_anno = args.contains("--anno 1") ? "touch ${prefix}.EDTA.TEanno.gff3": '' + """ + touch "${prefix}.log" + touch "${prefix}.EDTA.TElib.fa" + $touch_pass_list + $touch_out_file + $touch_te_anno + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + EDTA: \$(EDTA.pl -h | awk ' /##### Extensive/ {print \$7}') + END_VERSIONS + """ +} diff --git a/modules/pfr/edta/edta/meta.yml b/modules/pfr/edta/edta/meta.yml new file mode 100644 index 0000000..52503b8 --- /dev/null +++ b/modules/pfr/edta/edta/meta.yml @@ -0,0 +1,82 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "edta_edta" +description: Extensive de-novo TE Annotator (EDTA) +keywords: + - genome + - repeat + - annotation + - transposable-elements +tools: + - "edta": + description: Extensive de-novo TE Annotator (EDTA) + homepage: "https://github.com/oushujun/EDTA" + documentation: "https://github.com/oushujun/EDTA" + tool_dev_url: "https://github.com/oushujun/EDTA" + doi: "10.1186/s13059-019-1905-y" + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - fasta: + type: file + description: Genome fasta file + pattern: "*.{fsa,fa,fasta}" + - cds: + type: file + description: | + A FASTA file containing the coding sequence (no introns, UTRs, nor TEs) + of this genome or its close relative + pattern: "*.{fsa,fa,fasta}" + - curatedlib: + type: file + description: | + A curated library to keep consistent naming and classification for known TEs + pattern: "*.liban" + - rmout: + type: file + description: | + Homology-based TE annotation instead of using the EDTA library for masking in + RepeatMasker .out format + pattern: "*.out" + - exclude: + type: file + description: Exclude regions (bed format) from TE masking in the MAKER.masked output + pattern: "*.bed" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - log: + type: file + description: Log emitted by EDTA + pattern: "*.log" + - te_lib_fasta: + type: file + description: A non-redundant TE library in fasta format + pattern: "*.EDTA.TElib.fa" + - pass_list: + type: file + description: A summary table of intact LTR-RTs with coordinate and structural information + pattern: "*.EDTA.pass.list" + - out_file: + type: file + description: RepeatMasker annotation of all LTR sequences in the genome + pattern: "*.EDTA.out" + - te_anno_gff3: + type: file + description: A gff3 file containing both structurally intact and fragmented TE annotations + pattern: "*.EDTA.TEanno.gff3" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/pfr/edta/edta/tests/main.nf.test b/modules/pfr/edta/edta/tests/main.nf.test new file mode 100644 index 0000000..3aed0a2 --- /dev/null +++ b/modules/pfr/edta/edta/tests/main.nf.test @@ -0,0 +1,77 @@ +nextflow_process { + + name "Test Process EDTA_EDTA" + script "../main.nf" + process "EDTA_EDTA" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "edta" + tag "edta/edta" + + test("homo_sapiens-genome_fasta") { + + when { + process { + """ + input[0] = Channel.of(file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)) + | map { f -> + ( + ['>Chr21'] + + f.readLines().subList(66666.toInteger(), 116666.toInteger()) // 4 MB to 7 MB; 60 bases per line + ).join('\\n') + } + | collectFile(name: 'genome_3_to_10_mb.fasta') + | map { f -> [ [ id: 'test'], f ] } + input[1] = [] + input[2] = [] + input[3] = [] + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("versions") }, + { assert process.out.log != null }, + { assert process.out.te_lib_fasta != null }, + { assert process.out.pass_list != null }, + { assert process.out.out_file != null }, + { assert process.out.te_anno_gff3 != null } + ) + } + + } + +} diff --git a/modules/pfr/edta/edta/tests/nextflow.config b/modules/pfr/edta/edta/tests/nextflow.config new file mode 100644 index 0000000..e58e10e --- /dev/null +++ b/modules/pfr/edta/edta/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args = '--anno 1' +} diff --git a/modules/pfr/edta/edta/tests/tags.yml b/modules/pfr/edta/edta/tests/tags.yml new file mode 100644 index 0000000..180ae6d --- /dev/null +++ b/modules/pfr/edta/edta/tests/tags.yml @@ -0,0 +1,2 @@ +edta/edta: + - "modules/pfr/edta/edta/**" diff --git a/modules/pfr/lai/environment.yml b/modules/pfr/lai/environment.yml new file mode 100644 index 0000000..94fadbd --- /dev/null +++ b/modules/pfr/lai/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "lai" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::LTR_retriever=2.9.0" diff --git a/modules/pfr/lai/main.nf b/modules/pfr/lai/main.nf new file mode 100644 index 0000000..d4fced9 --- /dev/null +++ b/modules/pfr/lai/main.nf @@ -0,0 +1,69 @@ +process LAI { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ltr_retriever:2.9.0--hdfd78af_2': + 'biocontainers/ltr_retriever:2.9.0--hdfd78af_2' }" + + input: + tuple val(meta), path(fasta) + path pass_list + path annotation_out + path monoploid_seqs + + output: + tuple val(meta), path("*.LAI.log") , emit: log + tuple val(meta), path("*.LAI.out") , emit: lai_out , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def monoploid_param = monoploid_seqs ? "-mono $monoploid_seqs" : '' + def lai_output_name = monoploid_seqs ? "${annotation_out}.${monoploid_seqs}.out.LAI" : "${annotation_out}.LAI" + """ + # Remove comments from genome fasta, + # otherwise LAI triggers its sequence name change logic + + sed \\ + '/^>/ s/\\s.*\$//' \\ + $fasta \\ + > for_lai_no_comments.fsa + + LAI \\ + -genome for_lai_no_comments.fsa \\ + -intact $pass_list \\ + -all $annotation_out \\ + -t $task.cpus \\ + $monoploid_param \\ + $args \\ + > "${prefix}.LAI.log" + + mv \\ + $lai_output_name \\ + "${prefix}.LAI.out" \\ + || echo "LAI did not produce the output file" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + lai: \$(cat /usr/local/share/LTR_retriever/LAI | grep "my \\\$version" | sed 's/my \$version="//; s/";//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.LAI.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + lai: \$(cat /usr/local/share/LTR_retriever/LAI | grep "my \\\$version" | sed 's/my \$version="//; s/";//') + END_VERSIONS + """ +} diff --git a/modules/pfr/lai/meta.yml b/modules/pfr/lai/meta.yml new file mode 100644 index 0000000..6fd7aef --- /dev/null +++ b/modules/pfr/lai/meta.yml @@ -0,0 +1,68 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "lai" +description: Estimates the mean LTR sequence identity in the genome +keywords: + - genomics + - annotation + - repeat + - long terminal retrotransposon + - retrotransposon + - stats + - qc +tools: + - "lai": + description: Assessing genome assembly quality using the LTR Assembly Index (LAI) + homepage: "https://github.com/oushujun/LTR_retriever" + documentation: "https://github.com/oushujun/LTR_retriever" + tool_dev_url: "https://github.com/oushujun/LTR_retriever" + doi: "10.1093/nar/gky730" + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - fasta: + type: file + description: The genome file that is used to generate everything + pattern: "*.{fsa,fa,fasta}" + - pass_list: + type: file + description: A list of intact LTR-RTs generated by LTR_retriever + pattern: "*.pass.list" + - annotation_out: + type: file + description: RepeatMasker annotation of all LTR sequences in the genome + pattern: "*.out" + - monoploid_seqs: + type: file + description: | + This parameter is mainly for ployploid genomes. User provides a list of + sequence names that represent a monoploid (1x). LAI will be calculated only + on these sequences if provided. + pattern: "*.txt" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - log: + type: file + description: Log from LAI + pattern: "*.LAI.log" + - lai_out: + type: file + description: | + Output file from LAI if LAI is able to estimate the index from the inputs + pattern: "*.LAI.out" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/pfr/lai/tests/main.nf.test b/modules/pfr/lai/tests/main.nf.test new file mode 100644 index 0000000..353043c --- /dev/null +++ b/modules/pfr/lai/tests/main.nf.test @@ -0,0 +1,120 @@ +nextflow_process { + + name "Test Process LAI" + script "../main.nf" + process "LAI" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "lai" + tag "gt/suffixerator" + tag "nf-core/gunzip" + tag "gt/ltrharvest" + tag "ltrretriever" + + test("homo_sapiens-genome_21_fasta-success") { + + setup { + run("GUNZIP") { + script "../../../nf-core/gunzip" + + process { + """ + input[0] = [ + [ id:'test' ], + file('/Users/hrauxr/Projects/nxf-modules/tests/data/chr1.fa.gz', checkIfExists: true) + ] + """ + } + } + + run("GT_SUFFIXERATOR") { + script "../../../pfr/gt/suffixerator" + + process { + """ + input[0] = GUNZIP.out.gunzip + """ + } + } + + run("GT_LTRHARVEST") { + script "../../../pfr/gt/ltrharvest" + + process { + """ + input[0] = GT_SUFFIXERATOR.out.index + """ + } + } + + run("LTRRETRIEVER") { + script "../../../pfr/ltrretriever" + + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = GT_LTRHARVEST.out.tabout.map { meta, tabout -> tabout } + input[2] = [] + input[3] = [] + input[4] = [] + """ + } + } + } + + when { + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = LTRRETRIEVER.out.pass_list.map { meta, pass_list -> pass_list } + input[2] = LTRRETRIEVER.out.annotation_out.map { meta, annotation_out -> annotation_out } + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.log.get(0).get(1)).getText().contains("Dependency checking: Passed!") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("Calculate LAI:") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("Total LTR sequence content (0%) is too low for accurate LAI calculation") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("Sorry, LAI is not applicable on the current genome assembly.") }, + { assert process.out.lai_out == [] }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + +} diff --git a/modules/pfr/lai/tests/main.nf.test.snap b/modules/pfr/lai/tests/main.nf.test.snap new file mode 100644 index 0000000..751ddb6 --- /dev/null +++ b/modules/pfr/lai/tests/main.nf.test.snap @@ -0,0 +1,10 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,2ac93e1e6324236af6f9a794bbac2099" + ] + ], + "timestamp": "2023-12-05T12:15:32.969684" + } +} \ No newline at end of file diff --git a/modules/pfr/lai/tests/nextflow.config b/modules/pfr/lai/tests/nextflow.config new file mode 100644 index 0000000..516a3e2 --- /dev/null +++ b/modules/pfr/lai/tests/nextflow.config @@ -0,0 +1,10 @@ +process { + + withName: GT_SUFFIXERATOR { + ext.args = '-tis -suf -lcp -des -ssp -sds -dna' + } + + withName: GT_LTRHARVEST { + ext.args = '-minlenltr 100 -maxlenltr 7000 -mintsd 4 -maxtsd 6 -motif TGCA -motifmis 1 -similar 85 -vic 10 -seed 20 -seqids yes' + } +} diff --git a/modules/pfr/lai/tests/tags.yml b/modules/pfr/lai/tests/tags.yml new file mode 100644 index 0000000..252295d --- /dev/null +++ b/modules/pfr/lai/tests/tags.yml @@ -0,0 +1,2 @@ +lai: + - "modules/pfr/lai/**" diff --git a/modules/pfr/liftoff/environment.yml b/modules/pfr/liftoff/environment.yml new file mode 100644 index 0000000..8761c9b --- /dev/null +++ b/modules/pfr/liftoff/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "liftoff" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::liftoff=1.6.3" diff --git a/modules/pfr/liftoff/main.nf b/modules/pfr/liftoff/main.nf new file mode 100644 index 0000000..317eca1 --- /dev/null +++ b/modules/pfr/liftoff/main.nf @@ -0,0 +1,62 @@ +process LIFTOFF { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/liftoff:1.6.3--pyhdfd78af_0': + 'biocontainers/liftoff:1.6.3--pyhdfd78af_0' }" + + input: + tuple val(meta), path(target_fa) + path ref_fa, name: 'ref_assembly.fa' + path ref_annotation + + output: + tuple val(meta), path("${prefix}.gff3") , emit: gff3 + tuple val(meta), path("*.polished.gff3") , emit: polished_gff3, optional: true + tuple val(meta), path("*.unmapped.txt") , emit: unmapped_txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + liftoff \\ + -g $ref_annotation \\ + -p $task.cpus \\ + -o "${prefix}.gff3" \\ + -u "${prefix}.unmapped.txt" \\ + $args \\ + $target_fa \\ + ref_assembly.fa + + mv \\ + "${prefix}.gff3_polished" \\ + "${prefix}.polished.gff3" \\ + || echo "-polish is absent" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + liftoff: \$(liftoff --version 2> /dev/null) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def touch_polished = args.contains('-polish') ? "touch ${prefix}.polished.gff3" : '' + """ + touch "${prefix}.gff3" + touch "${prefix}.unmapped.txt" + $touch_polished + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + liftoff: \$(liftoff --version 2> /dev/null) + END_VERSIONS + """ +} diff --git a/modules/pfr/liftoff/meta.yml b/modules/pfr/liftoff/meta.yml new file mode 100644 index 0000000..46b3c58 --- /dev/null +++ b/modules/pfr/liftoff/meta.yml @@ -0,0 +1,66 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "liftoff" +description: | + Uses Liftoff to accurately map annotations in GFF or GTF between assemblies of the same, + or closely-related species +keywords: + - genome + - annotation + - gff3 + - gtf + - liftover +tools: + - "liftoff": + description: | + Liftoff is a tool that accurately maps annotations in GFF or GTF between assemblies of the same, + or closely-related species + homepage: "https://github.com/agshumate/Liftoff" + documentation: "https://github.com/agshumate/Liftoff" + tool_dev_url: "https://github.com/agshumate/Liftoff" + doi: "10.1093/bioinformatics/bty191" + licence: ["GPL v3 License"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - target_fa: + type: file + description: Target assembly in fasta format + pattern: "*.{fsa,fa,fasta}" + - ref_fa: + type: file + description: Reference assembly in fasta format + pattern: "*.{fsa,fa,fasta}" + - ref_annotation: + type: file + description: Reference assembly annotations in gtf or gff3 format + pattern: "*.{gtf,gff3}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - gff3: + type: file + description: Lifted annotations for the target assembly in gff3 format + pattern: "*.gff3" + - polished_gff3: + type: file + description: Polished lifted annotations for the target assembly in gff3 format + pattern: "*.polished.gff3" + - unmapped_txt: + type: file + description: List of unmapped reference annotations + pattern: "*.unmapped.txt" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/pfr/liftoff/tests/main.nf.test b/modules/pfr/liftoff/tests/main.nf.test new file mode 100644 index 0000000..272c882 --- /dev/null +++ b/modules/pfr/liftoff/tests/main.nf.test @@ -0,0 +1,119 @@ +nextflow_process { + + name "Test Process LIFTOFF" + script "../main.nf" + process "LIFTOFF" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "nf-core/gunzip" + tag "liftoff" + + test("homo_sapiens-genome_21_fasta-genome_1_fasta-genome_1_gtf") { + + setup { + run("GUNZIP") { + script "../../../nf-core/gunzip" + + process { + """ + input[0] = [ + [ id:'test' ], + file(params.test_data['homo_sapiens']['genome']['genome_1_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + ] + input[1] = GUNZIP.out.gunzip.map { meta, file -> file } + input[2] = [ + file(params.test_data['homo_sapiens']['genome']['genome_1_gtf'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.unmapped_txt).match("unmapped_txt") }, + { assert file(process.out.gff3[0][1]).text.contains("chr21\tLiftoff\texon\t34608061\t34608118\t.\t+\t.") }, + { assert file(process.out.polished_gff3[0][1]).text.contains("chr21\tLiftoff\texon\t34608061\t34608118\t.\t+\t.") }, + { + assert snapshot( + ( + [process.out.gff3[0][0].toString()] + // meta + process.out.gff3.collect { file(it[1]).getName() } + + process.out.polished_gff3.collect { file(it[1]).getName() } + + process.out.unmapped_txt.collect { file(it[1]).getName() } + ).sort() + ).match("homo_sapiens-genome_21_fasta-genome_1_fasta-genome_1_gtf-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("homo_sapiens-genome_21_fasta-genome_1_fasta-genome_1_gtf-stub") { + options '-stub' + + setup { + run("GUNZIP") { + script "../../../nf-core/gunzip" + + process { + """ + input[0] = [ + [ id:'test' ], + file(params.test_data['homo_sapiens']['genome']['genome_1_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + ] + input[1] = GUNZIP.out.gunzip.map { meta, file -> file } + input[2] = [ + file(params.test_data['homo_sapiens']['genome']['genome_1_gtf'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.gff3[0][0].toString()] + // meta + process.out.gff3.collect { file(it[1]).getName() } + + process.out.polished_gff3.collect { file(it[1]).getName() } + + process.out.unmapped_txt.collect { file(it[1]).getName() } + ).sort() + ).match("homo_sapiens-genome_21_fasta-genome_1_fasta-genome_1_gtf-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + +} diff --git a/modules/pfr/liftoff/tests/main.nf.test.snap b/modules/pfr/liftoff/tests/main.nf.test.snap new file mode 100644 index 0000000..baa4d70 --- /dev/null +++ b/modules/pfr/liftoff/tests/main.nf.test.snap @@ -0,0 +1,34 @@ +{ + "unmapped_txt": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.unmapped.txt:md5,7391d10df6e15db356b084c9af5259e4" + ] + ] + ], + "timestamp": "2023-12-01T13:57:40.748507" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,205d9c609e7fe27d8199550d842bdce8" + ] + ], + "timestamp": "2023-12-01T13:57:40.752414" + }, + "homo_sapiens-genome_21_fasta-genome_1_fasta-genome_1_gtf-for_stub_match": { + "content": [ + [ + "test.gff3", + "test.polished.gff3", + "test.unmapped.txt", + "{id=test}" + ] + ], + "timestamp": "2023-12-21T15:20:04.816416" + } +} \ No newline at end of file diff --git a/modules/pfr/liftoff/tests/nextflow.config b/modules/pfr/liftoff/tests/nextflow.config new file mode 100644 index 0000000..06b9d76 --- /dev/null +++ b/modules/pfr/liftoff/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: LIFTOFF { + ext.args = '-exclude_partial -copies -polish -a 0.1 -s 0.1' + } +} diff --git a/modules/pfr/liftoff/tests/tags.yml b/modules/pfr/liftoff/tests/tags.yml new file mode 100644 index 0000000..4ae1fb0 --- /dev/null +++ b/modules/pfr/liftoff/tests/tags.yml @@ -0,0 +1,2 @@ +liftoff: + - "modules/pfr/liftoff/**" diff --git a/modules/pfr/repeatmodeler/builddatabase/environment.yml b/modules/pfr/repeatmodeler/builddatabase/environment.yml new file mode 100644 index 0000000..ecc282e --- /dev/null +++ b/modules/pfr/repeatmodeler/builddatabase/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "repeatmodeler_builddatabase" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::repeatmodeler=2.0.5" diff --git a/modules/pfr/repeatmodeler/builddatabase/main.nf b/modules/pfr/repeatmodeler/builddatabase/main.nf new file mode 100644 index 0000000..486e25d --- /dev/null +++ b/modules/pfr/repeatmodeler/builddatabase/main.nf @@ -0,0 +1,50 @@ +process REPEATMODELER_BUILDDATABASE { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/repeatmodeler:2.0.5--pl5321hdfd78af_0': + 'biocontainers/repeatmodeler:2.0.5--pl5321hdfd78af_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("${prefix}.*") , emit: db + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + prefix = task.ext.prefix ?: "${meta.id}" + """ + BuildDatabase \\ + -name $prefix \\ + $fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + repeatmodeler: \$(RepeatModeler | grep '/usr/local/bin/RepeatModeler - ' | sed 's|/usr/local/bin/RepeatModeler - ||') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.nhr + touch ${prefix}.nin + touch ${prefix}.njs + touch ${prefix}.nnd + touch ${prefix}.nni + touch ${prefix}.nog + touch ${prefix}.nsq + touch ${prefix}.translation + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + repeatmodeler: \$(RepeatModeler | grep '/usr/local/bin/RepeatModeler - ' | sed 's|/usr/local/bin/RepeatModeler - ||') + END_VERSIONS + """ +} diff --git a/modules/pfr/repeatmodeler/builddatabase/meta.yml b/modules/pfr/repeatmodeler/builddatabase/meta.yml new file mode 100644 index 0000000..d3aa931 --- /dev/null +++ b/modules/pfr/repeatmodeler/builddatabase/meta.yml @@ -0,0 +1,44 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "repeatmodeler_builddatabase" +description: Create a database for RepeatModeler +keywords: + - genomics + - fasta + - repeat +tools: + - "repeatmodeler": + description: "RepeatModeler is a de-novo repeat family identification and modeling package." + homepage: "https://github.com/Dfam-consortium/RepeatModeler" + documentation: "https://github.com/Dfam-consortium/RepeatModeler" + tool_dev_url: "https://github.com/Dfam-consortium/RepeatModeler" + licence: ["Open Software License v2.1"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - fasta: + type: file + description: Fasta file + pattern: "*.{fasta,fsa,fa}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - db: + type: file + description: Database files for repeatmodeler + pattern: "`${prefix}.*`" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/pfr/repeatmodeler/builddatabase/tests/main.nf.test b/modules/pfr/repeatmodeler/builddatabase/tests/main.nf.test new file mode 100644 index 0000000..616f88c --- /dev/null +++ b/modules/pfr/repeatmodeler/builddatabase/tests/main.nf.test @@ -0,0 +1,60 @@ +nextflow_process { + + name "Test Process REPEATMODELER_BUILDDATABASE" + script "../main.nf" + process "REPEATMODELER_BUILDDATABASE" + + tag "modules" + tag "modules_nfcore" + tag "repeatmodeler" + tag "repeatmodeler/builddatabase" + + test("sarscov2-genome_fasta") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("versions") }, + { assert snapshot(process.out.db[0][1].collect { file(it).name }.sort().toString()).match("for-stub-match") } + ) + } + + } + + test("sarscov2-genome_fasta-stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("versions") }, + { assert snapshot(process.out.db[0][1].collect { file(it).name }.sort().toString()).match("for-stub-match") } + ) + } + + } + +} diff --git a/modules/pfr/repeatmodeler/builddatabase/tests/main.nf.test.snap b/modules/pfr/repeatmodeler/builddatabase/tests/main.nf.test.snap new file mode 100644 index 0000000..cda327e --- /dev/null +++ b/modules/pfr/repeatmodeler/builddatabase/tests/main.nf.test.snap @@ -0,0 +1,16 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,7944637266bc3e2726899eaad5e46c87" + ] + ], + "timestamp": "2024-01-09T15:14:48.807063" + }, + "for-stub-match": { + "content": [ + "[test.nhr, test.nin, test.njs, test.nnd, test.nni, test.nog, test.nsq, test.translation]" + ], + "timestamp": "2024-01-09T15:14:48.81702" + } +} \ No newline at end of file diff --git a/modules/pfr/repeatmodeler/builddatabase/tests/tags.yml b/modules/pfr/repeatmodeler/builddatabase/tests/tags.yml new file mode 100644 index 0000000..426540d --- /dev/null +++ b/modules/pfr/repeatmodeler/builddatabase/tests/tags.yml @@ -0,0 +1,2 @@ +repeatmodeler/builddatabase: + - "modules/pfr/repeatmodeler/builddatabase/**" diff --git a/modules/pfr/repeatmodeler/repeatmodeler/environment.yml b/modules/pfr/repeatmodeler/repeatmodeler/environment.yml new file mode 100644 index 0000000..2422071 --- /dev/null +++ b/modules/pfr/repeatmodeler/repeatmodeler/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "repeatmodeler_repeatmodeler" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::repeatmodeler=2.0.5" diff --git a/modules/pfr/repeatmodeler/repeatmodeler/main.nf b/modules/pfr/repeatmodeler/repeatmodeler/main.nf new file mode 100644 index 0000000..34df322 --- /dev/null +++ b/modules/pfr/repeatmodeler/repeatmodeler/main.nf @@ -0,0 +1,54 @@ +process REPEATMODELER_REPEATMODELER { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/repeatmodeler:2.0.5--pl5321hdfd78af_0': + 'biocontainers/repeatmodeler:2.0.5--pl5321hdfd78af_0' }" + + input: + tuple val(meta), path(db) + + output: + tuple val(meta), path("*.fa") , emit: fasta + tuple val(meta), path("*.stk") , emit: stk + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def db_name = file(db[0]).getBaseName() + """ + RepeatModeler \\ + -database $db_name \\ + $args \\ + -threads $task.cpus + + mv ${db_name}-families.fa ${prefix}.fa + mv ${db_name}-families.stk ${prefix}.stk + mv ${db_name}-rmod.log ${prefix}.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + repeatmodeler: \$(RepeatModeler | grep '/usr/local/bin/RepeatModeler - ' | sed 's|/usr/local/bin/RepeatModeler - ||') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.fa + touch ${prefix}.stk + touch ${prefix}.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + repeatmodeler: \$(RepeatModeler | grep '/usr/local/bin/RepeatModeler - ' | sed 's|/usr/local/bin/RepeatModeler - ||') + END_VERSIONS + """ +} diff --git a/modules/pfr/repeatmodeler/repeatmodeler/meta.yml b/modules/pfr/repeatmodeler/repeatmodeler/meta.yml new file mode 100644 index 0000000..29bb795 --- /dev/null +++ b/modules/pfr/repeatmodeler/repeatmodeler/meta.yml @@ -0,0 +1,52 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "repeatmodeler_repeatmodeler" +description: Performs de novo transposable element (TE) family identification with RepeatModeler +keywords: + - genomics + - fasta + - repeat + - transposable element +tools: + - "repeatmodeler": + description: "RepeatModeler is a de-novo repeat family identification and modeling package." + homepage: "https://github.com/Dfam-consortium/RepeatModeler" + documentation: "https://github.com/Dfam-consortium/RepeatModeler" + tool_dev_url: "https://github.com/Dfam-consortium/RepeatModeler" + licence: ["Open Software License v2.1"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - db: + type: file + description: RepeatModeler database files generated with REPEATMODELER_BUILDDATABASE + pattern: "*" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - fasta: + type: file + description: Consensus repeat sequences + pattern: "*.fa" + - stk: + type: file + description: Seed alignments + pattern: "*.stk" + - log: + type: file + description: A summarized log of the run + pattern: "*.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/pfr/repeatmodeler/repeatmodeler/tests/main.nf.test b/modules/pfr/repeatmodeler/repeatmodeler/tests/main.nf.test new file mode 100644 index 0000000..78b7957 --- /dev/null +++ b/modules/pfr/repeatmodeler/repeatmodeler/tests/main.nf.test @@ -0,0 +1,92 @@ +nextflow_process { + + name "Test Process REPEATMODELER_REPEATMODELER" + script "../main.nf" + process "REPEATMODELER_REPEATMODELER" + + tag "modules" + tag "modules_nfcore" + tag "repeatmodeler" + tag "repeatmodeler/repeatmodeler" + tag "repeatmodeler/builddatabase" + + test("homo_sapiens-genome_fasta") { + + setup { + run("REPEATMODELER_BUILDDATABASE") { + script "../../../../pfr/repeatmodeler/builddatabase" + + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = REPEATMODELER_BUILDDATABASE.out.db + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.fasta).match("fasta") }, + { assert snapshot(process.out.stk).match("stk") }, + { assert file(process.out.log[0][1]).text.contains('1 families discovered.') }, + { assert snapshot(process.out.versions).match("versions") }, + { + assert snapshot( + ( + process.out.fasta.collect { file(it[1]).getName() } + + process.out.stk.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + ).sort() + ).match("for-stub-match") + } + ) + } + + } + + test("homo_sapiens-genome_fasta-stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("versions") }, + { + assert snapshot( + ( + process.out.fasta.collect { file(it[1]).getName() } + + process.out.stk.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + ).sort() + ).match("for-stub-match") + } + ) + } + + } + +} diff --git a/modules/pfr/repeatmodeler/repeatmodeler/tests/main.nf.test.snap b/modules/pfr/repeatmodeler/repeatmodeler/tests/main.nf.test.snap new file mode 100644 index 0000000..051dd60 --- /dev/null +++ b/modules/pfr/repeatmodeler/repeatmodeler/tests/main.nf.test.snap @@ -0,0 +1,46 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,1bb6846ecf1304c262eaef4d3de60cf9" + ] + ], + "timestamp": "2024-01-09T15:06:55.753492" + }, + "stk": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.stk:md5,acd01ad35763c11315e2297a4f051d57" + ] + ] + ], + "timestamp": "2024-01-09T15:06:55.740963" + }, + "for-stub-match": { + "content": [ + [ + "test.fa", + "test.log", + "test.stk" + ] + ], + "timestamp": "2024-01-09T15:06:55.759971" + }, + "fasta": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.fa:md5,e25326771341204e1f8054d9529411e5" + ] + ] + ], + "timestamp": "2024-01-09T15:06:55.737658" + } +} \ No newline at end of file diff --git a/modules/pfr/repeatmodeler/repeatmodeler/tests/tags.yml b/modules/pfr/repeatmodeler/repeatmodeler/tests/tags.yml new file mode 100644 index 0000000..648cc93 --- /dev/null +++ b/modules/pfr/repeatmodeler/repeatmodeler/tests/tags.yml @@ -0,0 +1,2 @@ +repeatmodeler/repeatmodeler: + - "modules/pfr/repeatmodeler/repeatmodeler/**" diff --git a/nextflow.config b/nextflow.config index a624175..409da80 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,80 +1,87 @@ includeConfig './conf/base.config' params { - // FASTA files (fasta, fasta.gz) for the assemblies to annotate - // - // Pattern: [["tag", "file path"]] - // Permissible tags: tag, tag_1, tag_tag2_3, tag_tag2_tag3; - // Any name with alphanumeric characters including "_". - // "." is not allowed in the tag name - // Unique, short tags are recommended. - // Otherwise, some of the plots in the report may not display correctly. - // Examples: - // target_assemblies = [["tag1", "./a/relative/path/to/the/fasta/file.fasta"], - // ["tag2", "./a/relative/path/to/the/fasta/file2.fasta"], - // ["tag3", "https://ftp.ncbi.nlm.nih.gov/genomes/test_genome.fna"], ...] - // target_assemblies = [["tair10", "/an/absolute/path/to/the/fasta/file.fasta"]] target_assemblies = [ - ["red5_v2p1", "/workspace/hrauxr/pan-gene/.test/red5_v2p1_chr1.fasta"] + ["red5_v2p1", "/workspace/pangene/test_data/red5_v2p1_chr1.fasta"], + ["donghong", "/workspace/pangene/test_data/donghong.chr1.fsa.gz"] ] + // Pattern: [ [tag, fasta(.gz) ] ] + // Permissible tags: tag, tag_1, tag_tag2_3, tag_tag2_tag3; + // Any name with alphanumeric characters including "_". + // "." is not allowed in the tag name - // TE libs (fasta, fasta.gz) for target_assemblies - // - // Optional Set to [] if libraries are not available, te_libraries = [] + te_libraries = [ + ["donghong", "/workspace/pangene/test_data/donghong.TElib.fa.gz"] + ] + // Pattern: [ [tag, fasta(.gz) ] ] + // Optional Set to null if libraries are not available. // // Each TE library should have an associated (by tag) assembly in target_assemblies. // Not all target_assemblies need to have an associated (by tag) TE library. // When the TE lib is not available for a traget assembly, EDTA is used to create one. - - te_libraries = [] - - edta { - is_sensitive = false - save_outputs = true - } - repeatmasker { - save_outputs = true - } - - // Optional: Set to null if not available - samplesheet = "./.test/samplesheet.csv" - sample_prep { - skip_fastqc = false - skip_fastp = false - min_trimmed_reads = 10000 - extra_fastp_args = "" + repeat_annotator = 'repeatmodeler' + // 'repeatmodeler' or 'edta' - // toggling this parameter results in rerun of FASTP and FASTQC_TRIM - save_trimmed = false + save_annotated_te_lib = true - remove_ribo_rna = true - save_non_ribo_reads = false - ribo_database_manifest = "${projectDir}/assets/rrna-db-defaults.txt" - } + edta_is_sensitive = false - star_align { - max_intron_length = 16000 - extra_star_align_args = "" - save_outputs = false - } + repeatmasker_save_outputs = true + samplesheet = "/workspace/pangene/test_data/samplesheet.csv" // Optional: Set to null if not available - external_protein_seqs = [ + + skip_fastqc = false + skip_fastp = false + min_trimmed_reads = 10000 + extra_fastp_args = "" + + save_trimmed = true + // toggling this parameter results in rerun of FASTP and FASTQC_TRIM + + remove_ribo_rna = false + save_non_ribo_reads = true + ribo_database_manifest = "${projectDir}/assets/rrna-db-defaults.txt" + + star_max_intron_length = 16000 + star_align_extra_args = "" + star_save_outputs = true + save_cat_bam = true + // A single BAM is created for each assembly from all the RNAseq samples, if there + // are more than one + + external_protein_fastas = [ "/workspace/ComparativeDataSources/OrthoDB11/Viridiplantae.fa.gz", "/output/genomic/fairGenomes/Plant/Actinidia/chinensis/var_chinensis/male/2x/assembly_russell/v2.1/RU01.20221115150135.pep.fasta" ] + // Optional: Set to null if not available + + braker_extra_args = "" + + liftoff_xref_annotations = [ + [ + "/output/genomic/fairGenomes/Plant/Actinidia/chinensis/var_chinensis/male/2x/assembly_russell/v2.1/Russell_V2a.chromosomes.and.unassiged.and.haplotigs.fsa", + "/output/genomic/fairGenomes/Plant/Actinidia/chinensis/var_chinensis/male/2x/assembly_russell/v2.1/RU01.20221115150135.gff3" + ], + [ + "/output/genomic/fairGenomes/Plant/Arabidopsis/thaliana/var_na/sex_na/2x/assembly_tair/v10/TAIR10_chr_all.fas", + "/output/genomic/fairGenomes/Plant/Arabidopsis/thaliana/var_na/sex_na/2x/assembly_tair/v10/TAIR10_GFF3_genes_transposons.fixed.gff3" + ] + ] + // Format: [ [ fasta(.gz), gff3(.gz) ] ] + // Optional: Set to null if not available - braker { - extra_braker_args = "" - } + liftoff_coverage = 0.9 + liftoff_identity = 0.9 - outdir = "./results" + outdir = "./results" - max_cpus = 12 - max_memory = 200.GB - max_time = 1.days + max_cpus = 12 + max_memory = 200.GB + max_time = 1.days } +includeConfig './conf/manifest.config' includeConfig './conf/modules.config' -includeConfig './conf/reporting_defaults.config' \ No newline at end of file +includeConfig './conf/reporting_defaults.config' diff --git a/pan_gene_pfr.sh b/pan_gene_pfr.sh deleted file mode 100644 index a1b1ced..0000000 --- a/pan_gene_pfr.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash -e - - -#SBATCH --job-name PAN_GENE -#SBATCH --time=1-00:00:00 -#SBATCH --nodes=1 -#SBATCH --ntasks=1 -#SBATCH --cpus-per-task=2 -#SBATCH --output pan_gene_pfr.stdout -#SBATCH --error pan_gene_pfr.stderr -#SBATCH --mem=4G - -ml apptainer/1.1 -ml nextflow/22.10.4 - -export TMPDIR="/workspace/$USER/tmp" - -nextflow main.nf -profile slurm -resume \ No newline at end of file diff --git a/pangene_local b/pangene_local new file mode 100755 index 0000000..255edb9 --- /dev/null +++ b/pangene_local @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +NO_FORMAT="\033[0m" +C_RED="\033[38;5;9m" +F_BOLD="\033[1m" + +[[ $1 == '-stub' ]] \ + && stub='-stub' \ + || stub='' + +[[ $1 == '-stub' ]] \ + && echo 'Executing with -stub' \ + || echo -e "${C_RED}${F_BOLD}Executing without -stub${NO_FORMAT}" + +nextflow \ + main.nf \ + -profile local,docker \ + -resume \ + $stub \ + -params-file conf/test_params.json diff --git a/pangene_pfr b/pangene_pfr new file mode 100644 index 0000000..608798c --- /dev/null +++ b/pangene_pfr @@ -0,0 +1,22 @@ +#!/bin/bash -e + + +#SBATCH --job-name PANGENE +#SBATCH --time=1-00:00:00 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=1 +#SBATCH --output pangene_pfr.stdout +#SBATCH --error pangene_pfr.stderr +#SBATCH --mem=4G + +ml apptainer/1.1 +ml nextflow/23.04.4 + +export TMPDIR="/workspace/$USER/tmp" +export APPTAINER_BINDPATH="$APPTAINER_BINDPATH,$TMPDIR:$TMPDIR,$TMPDIR:/tmp" + +nextflow \ + main.nf \ + -profile pfr,apptainer \ + -resume diff --git a/subworkflows/local/align_rnaseq.nf b/subworkflows/local/align_rnaseq.nf new file mode 100644 index 0000000..05cd2fa --- /dev/null +++ b/subworkflows/local/align_rnaseq.nf @@ -0,0 +1,76 @@ +include { STAR_ALIGN } from '../../modules/nf-core/star/align' +include { SAMTOOLS_CAT } from '../../modules/nf-core/samtools/cat' + +workflow ALIGN_RNASEQ { + take: + reads_target // channel: [ meta, assembly_id ] + trim_reads // channel: [ meta, [ fq ] ] + assembly_index // channel: [ meta2, star_index ] + + main: + ch_versions = Channel.empty() + + // MODULE: STAR_ALIGN + ch_star_inputs = reads_target + | combine(trim_reads, by:0) + | map { meta, assembly, fastq -> + [ + assembly, + [ + id: "${meta.id}.on.${assembly}", + single_end: meta.single_end, + target_assembly: assembly + ], + fastq + ] + } + | combine( + assembly_index.map { meta, index -> [ meta.id, index ] }, + by:0 + ) + | map { assembly, meta, fastq, index -> [ meta, fastq, index ] } + + def star_ignore_sjdbgtf = true + def seq_platform = false + def seq_center = false + + STAR_ALIGN( + ch_star_inputs.map { meta, fastq, index -> [ meta, fastq ] }, + ch_star_inputs.map { meta, fastq, index -> [ [ id: meta.target_assembly ], index ] }, + ch_star_inputs.map { meta, fastq, index -> [ [ id: meta.target_assembly ], [] ] }, + star_ignore_sjdbgtf, + seq_platform, + seq_center + ) + + ch_star_bam = STAR_ALIGN.out.bam_sorted + ch_versions = ch_versions.mix(STAR_ALIGN.out.versions.first()) + + // MODULE: SAMTOOLS_CAT + ch_star_bam_branch = ch_star_bam + | map { meta, bam -> + [ + [ id: meta.target_assembly ], + bam instanceof List ? bam.find { it =~ /Aligned/ } : bam + ] + } + | groupTuple + | branch { meta, bamList -> + bams: bamList.size() > 1 + bam: bamList.size() <= 1 + } + + SAMTOOLS_CAT ( ch_star_bam_branch.bams ) + + ch_samtools_bam = SAMTOOLS_CAT.out.bam + | map { meta, bam -> [meta, [bam]] } + | mix( + ch_star_bam_branch.bam + ) + + ch_versions = ch_versions.mix(SAMTOOLS_CAT.out.versions.first()) + + emit: + bam = ch_samtools_bam // channel: [ [ id, single_end, target_assembly ], [ bam ] ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/extract_samples.nf b/subworkflows/local/extract_samples.nf index f3c1a15..947c0b7 100644 --- a/subworkflows/local/extract_samples.nf +++ b/subworkflows/local/extract_samples.nf @@ -1,15 +1,15 @@ -nextflow.enable.dsl=2 - +// Source: // https://github.com/nf-core/rnaseq // MIT: https://github.com/nf-core/rnaseq/blob/master/LICENSE // +// Check input samplesheet and get read channels +// // Changes: // Added channel permissible_target_assemblies // Changed file name from input_check.nf to extract_samples.nf // Removed strandedness -// -// Check input samplesheet and get read channels -// +// Nowing emitting an extra channel 'assemblies' which indicates the +// assemblies targeted by each read include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' @@ -20,36 +20,52 @@ workflow EXTRACT_SAMPLES { main: SAMPLESHEET_CHECK ( samplesheet, permissible_target_assemblies ) - .csv - .splitCsv ( header:true, sep:',' ) - .map { create_fastq_channel(it) } - .set { reads } + .csv + | splitCsv ( header:true, sep:',' ) + | combine ( samplesheet ) + | map { row, sheet -> + create_fastq_channel(row, sheet.getParent()) + } + | set { ch_reads } + + reads = ch_reads.map { meta, fastq -> [[id:meta.id, single_end:meta.single_end], fastq]} + + ch_reads + | flatMap { meta, fastq -> + meta.target_assemblies.collect { assembly -> [[id:meta.id, single_end:meta.single_end], assembly] } + } + | set { assemblies } emit: - reads // channel: [ val(meta), [ reads ] ] - versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] + reads // channel: [ val(meta), [ reads ] ] + assemblies // channel: [ val(meta), val(assembly) ] + versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] } // Function to get list of [ meta, [ fastq_1, fastq_2 ] ] -def create_fastq_channel(LinkedHashMap row) { +def create_fastq_channel(LinkedHashMap row, sheetPath) { // create meta map def meta = [:] meta.id = row.sample meta.single_end = row.single_end.toBoolean() meta.target_assemblies = row.target_assemblies.split(";").sort() + def fq1 = row.fastq_1.startsWith("/") ? row.fastq_1 : "$sheetPath/${row.fastq_1}" + def fq2 = row.fastq_2.startsWith("/") ? row.fastq_2 : "$sheetPath/${row.fastq_2}" + // add path(s) of the fastq file(s) to the meta map def fastq_meta = [] - if (!file(row.fastq_1).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" + if (!file(fq1).exists()) { + exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${fq1}" } if (meta.single_end) { - fastq_meta = [ meta, [ file(row.fastq_1) ] ] + fastq_meta = [ meta, [ file(fq1) ] ] } else { - if (!file(row.fastq_2).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}" + if (!file(fq2).exists()) { + exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${fq2}" } - fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ] + fastq_meta = [ meta, [ file(fq1), file(fq2) ] ] } + return fastq_meta -} \ No newline at end of file +} diff --git a/subworkflows/local/fasta_liftoff.nf b/subworkflows/local/fasta_liftoff.nf new file mode 100644 index 0000000..4c59ba3 --- /dev/null +++ b/subworkflows/local/fasta_liftoff.nf @@ -0,0 +1,98 @@ +include { GUNZIP as GUNZIP_FASTA } from '../../modules/nf-core/gunzip' +include { GUNZIP as GUNZIP_GFF } from '../../modules/nf-core/gunzip' +include { GFFREAD } from '../../modules/nf-core/gffread' +include { LIFTOFF } from '../../modules/pfr/liftoff' + +workflow FASTA_LIFTOFF { + take: + target_assemby // Channel: [ meta, fasta ] + xref_fasta // Channel: [ meta2, fasta ] + xref_gff // Channel: [ meta2, gff3 ] + + main: + ch_versions = Channel.empty() + + // MODULE: GUNZIP as GUNZIP_FASTA + ch_xref_fasta_branch = xref_fasta + | branch { meta, file -> + gz: "$file".endsWith(".gz") + rest: !"$file".endsWith(".gz") + } + + GUNZIP_FASTA ( ch_xref_fasta_branch.gz ) + + ch_xref_gunzip_fasta = GUNZIP_FASTA.out.gunzip + | mix( + ch_xref_fasta_branch.rest + ) + + ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions.first()) + + // MODULE: GUNZIP as GUNZIP_GFF + ch_xref_gff_branch = xref_gff + | branch { meta, file -> + gz: "$file".endsWith(".gz") + rest: !"$file".endsWith(".gz") + } + + GUNZIP_GFF ( ch_xref_gff_branch.gz ) + + ch_xref_gunzip_gff = GUNZIP_GFF.out.gunzip + | mix( + ch_xref_gff_branch.rest + ) + + ch_versions = ch_versions.mix(GUNZIP_GFF.out.versions.first()) + + // MODULE: GFFREAD + ch_gffread_inputs = ch_xref_gunzip_gff + | map { meta, gff -> + [ gff.getSimpleName(), meta, gff ] + } // For meta insertion later, remove when GFFREAD has meta + + GFFREAD ( ch_gffread_inputs.map { name, meta, gff -> gff } ) + + ch_gffread_gff = GFFREAD.out.gffread_gff + | map { gff -> [ gff.getSimpleName(), gff ] } + | join(ch_gffread_inputs) + | map { fid, gffread_gff, meta, gff -> [ meta, gffread_gff ] } + // meta insertion + + ch_versions = ch_versions.mix(GFFREAD.out.versions.first()) + + // MODULE: LIFTOFF + ch_liftoff_inputs = target_assemby + | combine( + ch_xref_gunzip_fasta + | join( + ch_gffread_gff + ) + ) + | map { meta, target_fa, ref_meta, ref_fa, ref_gff -> + [ + [ + id: "${meta.id}.from.${ref_meta.id}", + target_assemby: meta.id + ], + target_fa, + ref_fa, + ref_gff + ] + } + + LIFTOFF( + ch_liftoff_inputs.map { meta, target_fa, ref_fa, ref_gff -> [ meta, target_fa ] }, + ch_liftoff_inputs.map { meta, target_fa, ref_fa, ref_gff -> ref_fa }, + ch_liftoff_inputs.map { meta, target_fa, ref_fa, ref_gff -> ref_gff } + ) + + ch_liftoff_gff3 = LIFTOFF.out.polished_gff3 + | map { meta, gff -> [ [ id: meta.target_assemby ], gff ] } + | groupTuple + + ch_versions = ch_versions.mix(LIFTOFF.out.versions.first()) + + emit: + gff3 = ch_liftoff_gff3 // [ meta, [ gff3 ] ] + versions = ch_versions // [ versions.yml ] +} diff --git a/subworkflows/local/perform_edta_annotation.nf b/subworkflows/local/perform_edta_annotation.nf deleted file mode 100644 index d362934..0000000 --- a/subworkflows/local/perform_edta_annotation.nf +++ /dev/null @@ -1,48 +0,0 @@ -nextflow.enable.dsl=2 - -include { SHORTEN_EDTA_IDS } from '../../modules/local/edta/shorten_edta_ids' -include { EDTA } from '../../modules/local/edta/edta' -include { RESTORE_EDTA_IDS } from '../../modules/local/edta/restore_edta_ids' - -// https://github.com/Plant-Food-Research-Open/assembly_qc -// GPL-3.0: https://github.com/Plant-Food-Research-Open/assembly_qc/blob/main/LICENSE -workflow PERFORM_EDTA_ANNOTATION { - take: - genome_fasta // [meta, /path/to/genome/fasta] - - main: - - SHORTEN_EDTA_IDS(genome_fasta) - .renamed_ids_fasta - | EDTA - - RESTORE_EDTA_IDS( - EDTA.out.te_lib_fasta, - EDTA.out.intact_gff3.map { it[1] }, - EDTA.out.pass_list.map { it[1] }, - EDTA.out.out_file.map { it[1] }, - EDTA.out.te_anno_gff3.map { it[1] }, - SHORTEN_EDTA_IDS.out.renamed_ids_tsv.map { it[1] } - ) - - Channel.empty() - | mix( - SHORTEN_EDTA_IDS.out.versions.first() - ) - | mix( - EDTA.out.versions.first() - ) - | mix( - RESTORE_EDTA_IDS.out.versions.first() - ) - | set { ch_versions } - - emit: - te_lib_fasta = RESTORE_EDTA_IDS.out.te_lib_fasta - intact_gff3 = RESTORE_EDTA_IDS.out.intact_gff3 - pass_list = RESTORE_EDTA_IDS.out.pass_list - out_file = RESTORE_EDTA_IDS.out.out_file - te_anno_gff3 = RESTORE_EDTA_IDS.out.te_anno_gff3 - renamed_ids_tsv = RESTORE_EDTA_IDS.out.renamed_ids_tsv - versions = ch_versions -} \ No newline at end of file diff --git a/subworkflows/local/prepare_assembly.nf b/subworkflows/local/prepare_assembly.nf new file mode 100644 index 0000000..d18f5ce --- /dev/null +++ b/subworkflows/local/prepare_assembly.nf @@ -0,0 +1,129 @@ +include { GUNZIP as GUNZIP_TARGET_ASSEMBLY } from '../../modules/nf-core/gunzip' +include { GUNZIP as GUNZIP_TE_LIBRARY } from '../../modules/nf-core/gunzip' +include { FASTAVALIDATOR } from '../../modules/nf-core/fastavalidator' +include { REPEATMODELER_BUILDDATABASE } from '../../modules/pfr/repeatmodeler/builddatabase' +include { REPEATMODELER_REPEATMODELER } from '../../modules/pfr/repeatmodeler/repeatmodeler' +include { REPEATMASKER } from '../../modules/kherronism/repeatmasker' +include { STAR_GENOMEGENERATE } from '../../modules/nf-core/star/genomegenerate' + +include { FASTA_EDTA_LAI } from '../../subworkflows/pfr/fasta_edta_lai' + +workflow PREPARE_ASSEMBLY { + take: + target_assembly // channel: [ meta, fasta ] + te_library // channel: [ meta, fasta ] + repeat_annotator // val(String), 'repeatmodeler' or 'edta' + + main: + ch_versions = Channel.empty() + + // MODULE: GUNZIP_TARGET_ASSEMBLY + target_assembly_branch = target_assembly + | branch { meta, file -> + gz: "$file".endsWith(".gz") + rest: !"$file".endsWith(".gz") + } + + GUNZIP_TARGET_ASSEMBLY ( target_assembly_branch.gz ) + + ch_gunzip_assembly = GUNZIP_TARGET_ASSEMBLY.out.gunzip + | mix( + target_assembly_branch.rest + ) + ch_versions = ch_versions.mix(GUNZIP_TARGET_ASSEMBLY.out.versions.first()) + + + // MODULE: FASTAVALIDATOR + FASTAVALIDATOR ( ch_gunzip_assembly ) + + ch_validated_assembly = ch_gunzip_assembly + | join(FASTAVALIDATOR.out.success_log) + | map { meta, fasta, log -> [ meta, fasta ] } + ch_versions = ch_versions.mix(FASTAVALIDATOR.out.versions.first()) + + FASTAVALIDATOR.out.error_log + | map { meta, log -> + System.err.println("WARNING: FASTAVALIDATOR failed for ${meta.id} with error: ${log}. ${meta.id} is excluded from further analysis.") + } + + // MODULE: GUNZIP_TE_LIBRARY + ch_te_library_branch = te_library + | branch { meta, file -> + gz: "$file".endsWith(".gz") + rest: !"$file".endsWith(".gz") + } + + GUNZIP_TE_LIBRARY ( ch_te_library_branch.gz ) + + ch_gunzip_te_library = GUNZIP_TE_LIBRARY.out.gunzip + | mix( + ch_te_library_branch.rest + ) + ch_versions = ch_versions.mix(GUNZIP_TE_LIBRARY.out.versions.first()) + + // SUBWORKFLOW: FASTA_EDTA_LAI + ch_annotator_inputs = ch_validated_assembly + | join( + ch_gunzip_te_library, remainder: true + ) + | filter { meta, assembly, teLib -> + teLib == null + } + | map { meta, assembly, teLib -> [meta, assembly] } + + ch_edta_inputs = repeat_annotator != 'edta' + ? Channel.empty() + : ch_annotator_inputs + + FASTA_EDTA_LAI( + ch_edta_inputs, + [], + true // Skip LAI + ) + + ch_versions = ch_versions.mix(FASTA_EDTA_LAI.out.versions.first()) + + // MODULE: REPEATMODELER_BUILDDATABASE + ch_repeatmodeler_inputs = repeat_annotator != 'repeatmodeler' + ? Channel.empty() + : ch_annotator_inputs + + REPEATMODELER_BUILDDATABASE ( ch_repeatmodeler_inputs ) + + ch_versions = ch_versions.mix(REPEATMODELER_BUILDDATABASE.out.versions.first()) + + // MODULE: REPEATMODELER_REPEATMODELER + REPEATMODELER_REPEATMODELER ( REPEATMODELER_BUILDDATABASE.out.db ) + + ch_assembly_and_te_lib = ch_validated_assembly + | join( + repeat_annotator == 'edta' + ? FASTA_EDTA_LAI.out.te_lib_fasta.mix(ch_gunzip_te_library) + : REPEATMODELER_REPEATMODELER.out.fasta.mix(ch_gunzip_te_library) + ) + + ch_versions = ch_versions.mix(REPEATMODELER_REPEATMODELER.out.versions.first()) + + // MODULE: REPEATMASKER + REPEATMASKER( + ch_assembly_and_te_lib.map { meta, assembly, teLib -> [meta, assembly] }, + ch_assembly_and_te_lib.map { meta, assembly, teLib -> teLib }, + ) + + ch_versions = ch_versions.mix(REPEATMASKER.out.versions.first()) + + // MODULE: STAR_GENOMEGENERATE + STAR_GENOMEGENERATE( + ch_validated_assembly, + ch_validated_assembly.map { meta, fasta -> [ [], [] ] } + ) + + ch_assembly_index = STAR_GENOMEGENERATE.out.index + ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions.first()) + + emit: + target_assemby = ch_validated_assembly // channel: [ meta, fasta ] + masked_target_assembly = REPEATMASKER.out.fasta_masked // channel: [ meta, fasta ] + target_assemby_index = ch_assembly_index // channel: [ meta, star_index ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/prepare_ext_prots.nf b/subworkflows/local/prepare_ext_prots.nf new file mode 100644 index 0000000..ee65f77 --- /dev/null +++ b/subworkflows/local/prepare_ext_prots.nf @@ -0,0 +1,35 @@ +include { GUNZIP } from '../../modules/nf-core/gunzip' +include { CAT_CAT as CAT_PROTEIN_FASTAS } from '../../modules/nf-core/cat/cat' + +workflow PREPARE_EXT_PROTS { + take: + ch_ext_prot_fastas // Channel: [ meta, fasta ] + + main: + ch_versions = Channel.empty() + + // MODULE: GUNZIP + ch_ext_prot_seqs_branch = ch_ext_prot_fastas + | branch { meta, file -> + gz: "$file".endsWith(".gz") + rest: !"$file".endsWith(".gz") + } + + GUNZIP ( ch_ext_prot_seqs_branch.gz ) + + ch_ext_prot_gunzip_fastas = GUNZIP.out.gunzip.mix(ch_ext_prot_seqs_branch.rest) + | map { meta, filePath -> filePath } + | collect + | map { fileList -> [ [ id: "ext_protein_seqs" ], fileList ] } + + ch_versions = ch_versions.mix(GUNZIP.out.versions.first()) + + // MODULE: CAT_CAT as CAT_PROTEIN_FASTAS + CAT_PROTEIN_FASTAS ( ch_ext_prot_gunzip_fastas ) + + ch_versions = ch_versions.mix(CAT_PROTEIN_FASTAS.out.versions) + + emit: + ext_prots_fasta = CAT_PROTEIN_FASTAS.out.file_out // Channel: [ meta, fasta ] + versions = ch_versions // Channel: [ versions.yml ] +} diff --git a/subworkflows/local/preprocess_rnaseq.nf b/subworkflows/local/preprocess_rnaseq.nf new file mode 100644 index 0000000..9466104 --- /dev/null +++ b/subworkflows/local/preprocess_rnaseq.nf @@ -0,0 +1,101 @@ +include { CAT_FASTQ } from '../../modules/nf-core/cat/fastq' +include { SORTMERNA } from '../../modules/nf-core/sortmerna' +include { EXTRACT_SAMPLES } from '../../subworkflows/local/extract_samples' +include { FASTQ_FASTQC_UMITOOLS_FASTP } from '../../subworkflows/nf-core/fastq_fastqc_umitools_fastp' + +workflow PREPROCESS_RNASEQ { + take: + samplesheet // path: csv + permissible_assemblies // val: assembly_a,assembly_b + skip_fastqc // val: true|false + skip_fastp // val: true|false + save_trimmed // val: true|false + min_trimmed_reads // val: Integer + remove_ribo_rna // val: true|false + sortmerna_fastas // channel: [ [ fasta ] ] + + main: + ch_versions = Channel.empty() + + // SUBWORKFLOW: EXTRACT_SAMPLES + EXTRACT_SAMPLES( + samplesheet, + permissible_assemblies + ) + + ch_fastq = EXTRACT_SAMPLES.out.reads + | map { meta, fastq -> + groupID = meta.id - ~/_T\d+/ + [ meta + [id: groupID], fastq ] + } + | groupTuple() + | branch { meta, fastq -> + single : fastq.size() == 1 + return [ meta, fastq.flatten() ] + multiple: fastq.size() > 1 + return [ meta, fastq.flatten() ] + } + + ch_reads_target = EXTRACT_SAMPLES.out.assemblies + | map { meta, assembly -> + groupID = meta.id - ~/_T\d+/ + [ meta + [id: groupID], assembly ] + } + | unique + + ch_versions = ch_versions.mix(EXTRACT_SAMPLES.out.versions) + + // MODULES: CAT_FASTQ + CAT_FASTQ ( ch_fastq.multiple ) + + ch_cat_fastq = CAT_FASTQ.out.reads.mix(ch_fastq.single) + ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first()) + + // SUBWORKFLOW: FASTQ_FASTQC_UMITOOLS_FASTP + def with_umi = false + def skip_umi_extract = true + def umi_discard_read = false + + FASTQ_FASTQC_UMITOOLS_FASTP ( + ch_cat_fastq, + skip_fastqc, + with_umi, + skip_umi_extract, + umi_discard_read, + skip_fastp, + [], + save_trimmed, + save_trimmed, + min_trimmed_reads + ) + + ch_trim_reads = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads + + ch_cat_fastq + | join(ch_trim_reads, remainder:true) + | map { meta, reads, trimmed -> + if (!trimmed) { + System.err.println("WARNING: Dropping ${reads.collect { it.getName() }} as read count after trimming is less than $min_trimmed_reads") + } + } + + ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions.first()) + + // MODULE: SORTMERNA + SORTMERNA( + remove_ribo_rna ? ch_trim_reads : Channel.empty(), + sortmerna_fastas + ) + + ch_emitted_reads = remove_ribo_rna + ? SORTMERNA.out.reads + : ch_trim_reads + ch_versions = ch_versions.mix(SORTMERNA.out.versions.first()) + + + + emit: + trim_reads = ch_emitted_reads // channel: [ meta, [ fq ] ] + reads_target = ch_reads_target // channel: [ meta, assembly_id ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf index 63a6592..2c67b3c 100644 --- a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf @@ -12,7 +12,10 @@ include { FASTP } from '../../../modules/nf-core/fastp/main' // import groovy.json.JsonSlurper -def getFastpReadsAfterFiltering(json_file) { +def getFastpReadsAfterFiltering(json_file, min_trimmed_reads) { + + if (!json_file.text) { return min_trimmed_reads } // Usman Rashid: To allow -stub with FASTP + def Map json = (Map) new JsonSlurper().parseText(json_file.text).get('summary') return json['after_filtering']['total_reads'].toLong() } @@ -96,8 +99,7 @@ workflow FASTQ_FASTQC_UMITOOLS_FASTP { .out .reads .join(trim_json) - // Change: Bypassing getFastpReadsAfterFiltering when FASTP stub returns empty json - .map { meta, reads, json -> [ meta, reads, json.text ? getFastpReadsAfterFiltering(json) : min_trimmed_reads.toLong()] } + .map { meta, reads, json -> [ meta, reads, getFastpReadsAfterFiltering(json, min_trimmed_reads) ] } .set { ch_num_trimmed_reads } ch_num_trimmed_reads diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml index eafb0dc..220e8db 100644 --- a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml @@ -69,8 +69,10 @@ output: - reads: type: file description: > - Extracted FASTQ files. | - For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | + Extracted FASTQ files. | For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | + + + For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz. pattern: "*.{fastq.gz}" - fastqc_html: @@ -122,4 +124,5 @@ output: pattern: "versions.yml" authors: - "@robsyme" - - "@gallvp" +maintainers: + - "@robsyme" diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test new file mode 100644 index 0000000..cdd7398 --- /dev/null +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test @@ -0,0 +1,60 @@ +nextflow_workflow { + + name "Test Workflow FASTQ_FASTQC_UMITOOLS_FASTP" + script "../main.nf" + workflow "FASTQ_FASTQC_UMITOOLS_FASTP" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/fastq_fastqc_umitools_fastp" + tag "fastq_fastqc_umitools_fastp" + tag "fastqc" + tag "umitools/extract" + tag "fastp" + + + test("sarscov2 paired-end [fastq]") { + + when { + workflow { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = false // skip_fastqc + input[2] = false // with_umi + input[3] = false // skip_umi_extract + input[4] = 1 // umi_discard_read + input[5] = false // skip_trimming + input[6] = [] // adapter_fasta + input[7] = false // save_trimmed_fail + input[8] = false // save_merged + input[9] = 1 // min_trimmed_reads + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out.reads).match("reads") }, + { assert snapshot(workflow.out.umi_log).match("umi_log") }, + { assert snapshot(workflow.out.trim_json).match("trim_json") }, + { assert snapshot(workflow.out.trim_reads_fail).match("trim_reads_fail") }, + { assert snapshot(workflow.out.trim_reads_merged).match("trim_reads_merged") }, + { assert snapshot(workflow.out.trim_read_count).match("trim_read_count") }, + { assert snapshot(workflow.out.versions).match("versions") }, + + { assert workflow.out.fastqc_raw_html }, + { assert workflow.out.fastqc_raw_zip }, + { assert workflow.out.trim_html }, + { assert workflow.out.trim_log }, + { assert workflow.out.fastqc_trim_html }, + { assert workflow.out.fastqc_trim_zip } + ) + } + } +} diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap new file mode 100644 index 0000000..38a65ae --- /dev/null +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap @@ -0,0 +1,81 @@ +{ + "trim_reads_merged": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-26T02:28:26.26920982" + }, + "trim_reads_fail": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-26T02:28:26.25861515" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,85bd0117e5778fff18e3920972a296ad", + "versions.yml:md5,c50aa59475ab901bc6f9a2cf7b1a14e0", + "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e" + ] + ], + "timestamp": "2023-11-26T02:28:26.30891403" + }, + "trim_json": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,1e0f8e27e71728e2b63fc64086be95cd" + ] + ] + ], + "timestamp": "2023-11-26T02:28:26.24768259" + }, + "reads": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7", + "test_2.fastp.fastq.gz:md5,25cbdca08e2083dbd4f0502de6b62f39" + ] + ] + ] + ], + "timestamp": "2023-12-04T11:30:32.061644815" + }, + "umi_log": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-26T02:28:26.238536" + }, + "trim_read_count": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + 198 + ] + ] + ], + "timestamp": "2023-11-26T02:28:26.27984169" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/tags.yml b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/tags.yml new file mode 100644 index 0000000..84a4b56 --- /dev/null +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/fastq_fastqc_umitools_fastp: + - subworkflows/nf-core/fastq_fastqc_umitools_fastp/** diff --git a/subworkflows/pfr/fasta_edta_lai/main.nf b/subworkflows/pfr/fasta_edta_lai/main.nf new file mode 100644 index 0000000..2e73ca5 --- /dev/null +++ b/subworkflows/pfr/fasta_edta_lai/main.nf @@ -0,0 +1,88 @@ +include { CUSTOM_SHORTENFASTAIDS } from '../../../modules/pfr/custom/shortenfastaids' +include { EDTA_EDTA } from '../../../modules/pfr/edta/edta' +include { LAI } from '../../../modules/pfr/lai' +include { CUSTOM_RESTOREGFFIDS } from '../../../modules/pfr/custom/restoregffids' + +workflow FASTA_EDTA_LAI { + + take: + ch_fasta // channel: [ val(meta), fasta ] + ch_monoploid_seqs // channel: [ val(meta), txt ]; Optional: Set to [] if not needed + skip_lai // val; true|false + + main: + + ch_versions = Channel.empty() + + // MOUDLE: CUSTOM_SHORTENFASTAIDS + CUSTOM_SHORTENFASTAIDS ( ch_fasta ) + + ch_short_ids_fasta = ch_fasta + | join(CUSTOM_SHORTENFASTAIDS.out.short_ids_fasta, by:0, remainder:true) + | map { meta, fasta, short_ids_fasta -> + [ meta, short_ids_fasta ?: fasta ] + } + + ch_short_ids_tsv = CUSTOM_SHORTENFASTAIDS.out.short_ids_tsv + ch_versions = ch_versions.mix(CUSTOM_SHORTENFASTAIDS.out.versions.first()) + + // MODULE: EDTA_EDTA + EDTA_EDTA ( + ch_short_ids_fasta, + [], + [], + [], + [] + ) + + ch_te_lib_fasta = EDTA_EDTA.out.te_lib_fasta + ch_pass_list = EDTA_EDTA.out.pass_list + ch_out_file = EDTA_EDTA.out.out_file + ch_te_anno_gff3 = EDTA_EDTA.out.te_anno_gff3 + ch_versions = ch_versions.mix(EDTA_EDTA.out.versions.first()) + + // MODULE: LAI + ch_lai_inputs = skip_lai + ? Channel.empty() + : ch_short_ids_fasta + | join(ch_pass_list) + | join(ch_out_file) + | join( + ch_monoploid_seqs ?: Channel.empty(), + by:0, + remainder: true + ) + | map { meta, fasta, pass, out, mono -> + [ meta, fasta, pass, out, mono ?: [] ] + } + LAI ( + ch_lai_inputs.map { meta, fasta, pass, out, mono -> [ meta, fasta ] }, + ch_lai_inputs.map { meta, fasta, pass, out, mono -> pass }, + ch_lai_inputs.map { meta, fasta, pass, out, mono -> out }, + ch_lai_inputs.map { meta, fasta, pass, out, mono -> mono } + ) + + ch_lai_log = LAI.out.log + ch_lai_out = LAI.out.lai_out + ch_versions = ch_versions.mix(LAI.out.versions.first()) + + // MODULE: CUSTOM_RESTOREGFFIDS + ch_restorable_gff_tsv = ch_te_anno_gff3.join(ch_short_ids_tsv) + + CUSTOM_RESTOREGFFIDS ( + ch_restorable_gff_tsv.map { meta, gff, tsv -> [ meta, gff ] }, + ch_restorable_gff_tsv.map { meta, gff, tsv -> tsv } + ) + + ch_restored_gff = ch_te_anno_gff3 + | join(CUSTOM_RESTOREGFFIDS.out.restored_ids_gff3, by:0, remainder:true) + | map { meta, gff, restored_gff -> [ meta, restored_gff ?: gff ] } + ch_versions = ch_versions.mix(CUSTOM_RESTOREGFFIDS.out.versions.first()) + + emit: + te_lib_fasta = ch_te_lib_fasta // channel: [ val(meta), fasta ] + te_anno_gff3 = ch_restored_gff // channel: [ val(meta), gff ] + lai_log = ch_lai_log // channel: [ val(meta), log ] + lai_out = ch_lai_out // channel: [ val(meta), out ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/pfr/fasta_edta_lai/meta.yml b/subworkflows/pfr/fasta_edta_lai/meta.yml new file mode 100644 index 0000000..52483ce --- /dev/null +++ b/subworkflows/pfr/fasta_edta_lai/meta.yml @@ -0,0 +1,69 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "fasta_edta_lai" +description: | + Performs extensive de-novo transposable element annotation with EDTA and optionally estimates repeat-space completeness with LAI +keywords: + - genomics + - genome + - annotation + - repeat + - transposons + - stats + - qc +components: + - custom/restoregffids + - custom/shortenfastaids + - edta/edta + - lai +input: + - ch_fasta: + type: file + description: | + Channel for the assembly fasta file + Structure: [ val(meta), path(fasta) ] + pattern: "*.{fsa/fa/fasta}" + - ch_monoploid_seqs: + type: file + description: | + Channel for providing a list of monoploid sequences + for correct estimation of LAI for polyploid genomes. + This parameter is useful when all the haplotypes are + stored in a single fasta file. + Structure: [ val(meta), path(txt) ] + pattern: "*.txt" + - skip_lai: + type: boolean + description: | + Skip LAI estimation + Structure: [ val(boolean) ] +output: + - te_lib_fasta: + type: file + description: A non-redundant TE library in fasta format + pattern: "*.EDTA.TElib.fa" + - te_anno_gff3: + type: file + description: A gff3 file containing both structurally intact and fragmented TE annotations + pattern: "*.EDTA.TEanno.gff3" + - lai_log: + type: file + description: | + Log from LAI + Structure: [ val(meta), path(log) ] + pattern: "*.LAI.log" + - lai_out: + type: file + description: | + LAI output + Structure: [ val(meta), path(out) ] + pattern: "*.LAI.out" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/subworkflows/pfr/fasta_edta_lai/tests/main.nf.test b/subworkflows/pfr/fasta_edta_lai/tests/main.nf.test new file mode 100644 index 0000000..e852a70 --- /dev/null +++ b/subworkflows/pfr/fasta_edta_lai/tests/main.nf.test @@ -0,0 +1,50 @@ +nextflow_workflow { + + name "Test Workflow FASTA_EDTA_LAI" + script "../main.nf" + workflow "FASTA_EDTA_LAI" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/fasta_edta_lai" + tag "fasta_edta_lai" + tag "lai" + tag "edta/edta" + tag "custom/restoregffids" + tag "custom/shortenfastaids" + + test("test_data") { + + setup { + run("GUNZIP") { + script "../../../../modules/nf-core/gunzip" + + process { + """ + input[0] = [ + [ id:'test' ], + file('/Users/hrauxr/Projects/nxf-modules/tests/data/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + } + + when { + workflow { + """ + input[0] = GUNZIP.out.gunzip + input[1] = [] + input[2] = false + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out.versions).match("versions") } + ) + } + } +} diff --git a/subworkflows/pfr/fasta_edta_lai/tests/main.nf.test.snap b/subworkflows/pfr/fasta_edta_lai/tests/main.nf.test.snap new file mode 100644 index 0000000..574acc9 --- /dev/null +++ b/subworkflows/pfr/fasta_edta_lai/tests/main.nf.test.snap @@ -0,0 +1,11 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,0d4bc49e94acb8995ca552d4e666e3ce", + "versions.yml:md5,754bb19f86be761d90c002a0af2faf1c" + ] + ], + "timestamp": "2023-12-22T14:09:24.171934" + } +} \ No newline at end of file diff --git a/subworkflows/pfr/fasta_edta_lai/tests/tags.yml b/subworkflows/pfr/fasta_edta_lai/tests/tags.yml new file mode 100644 index 0000000..b114c58 --- /dev/null +++ b/subworkflows/pfr/fasta_edta_lai/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/fasta_edta_lai: + - subworkflows/pfr/fasta_edta_lai/** diff --git a/workflows/pan_gene.nf b/workflows/pan_gene.nf deleted file mode 100644 index fccbe2e..0000000 --- a/workflows/pan_gene.nf +++ /dev/null @@ -1,406 +0,0 @@ -nextflow.enable.dsl=2 - -include { GUNZIP as GUNZIP_TARGET_ASSEMBLY } from '../modules/nf-core/gunzip' -include { GUNZIP as GUNZIP_TE_LIBRARY } from '../modules/nf-core/gunzip' -include { GUNZIP as GUNZIP_EXTERNAL_PROTEIN_SEQ } from '../modules/nf-core/gunzip' -include { FASTA_VALIDATE } from '../modules/local/fasta_validate' -include { REPEATMASKER } from '../modules/kherronism/repeatmasker' -include { STAR_GENOMEGENERATE } from '../modules/nf-core/star/genomegenerate' -include { CAT_FASTQ } from '../modules/nf-core/cat/fastq' -include { SORTMERNA } from '../modules/nf-core/sortmerna' -include { STAR_ALIGN } from '../modules/nf-core/star/align' -include { SAMTOOLS_CAT } from '../modules/nf-core/samtools/cat' -include { CAT_CAT as CAT_PROTEIN_SEQS } from '../modules/nf-core/cat/cat' -include { BRAKER3 } from '../modules/kherronism/braker3' - -include { PERFORM_EDTA_ANNOTATION } from '../subworkflows/local/perform_edta_annotation' -include { EXTRACT_SAMPLES } from '../subworkflows/local/extract_samples' -include { FASTQ_FASTQC_UMITOOLS_FASTP } from '../subworkflows/nf-core/fastq_fastqc_umitools_fastp' - -include { validateParams } from '../modules/local/validate_params' - -validateParams(params) - -// Additional validation -// Check rRNA databases for sortmerna -if (params.sample_prep.remove_ribo_rna) { - ch_ribo_db = file(params.sample_prep.ribo_database_manifest, checkIfExists: true) - if (ch_ribo_db.isEmpty()) {exit 1, "File provided with --ribo_database_manifest is empty: ${ch_ribo_db.getName()}!"} -} - -workflow PAN_GENE { - - // Versions - Channel.empty() - | set { ch_versions } - - // GUNZIP: target_assemblies - Channel.fromList(params.target_assemblies) - | map { tag, filePath -> - [[id:tag], file(filePath, checkIfExists: true)] - } - | branch { meta, file -> - gz: "$file".endsWith(".gz") - rest: !"$file".endsWith(".gz") - } - | set { ch_target_assemblies } - - GUNZIP_TARGET_ASSEMBLY( - ch_target_assemblies.gz - ) - .gunzip - | mix( - ch_target_assemblies.rest - ) - | set { ch_gunzip_target_assemblies } - - ch_versions - | mix(GUNZIP_TARGET_ASSEMBLY.out.versions.first()) - | set { ch_versions } - - // FASTA_VALIDATE - FASTA_VALIDATE(ch_gunzip_target_assemblies) - .valid_fasta - | set { ch_validated_target_assemblies } - - ch_versions - | mix(FASTA_VALIDATE.out.versions.first()) - | set { ch_versions } - - // GUNZIP: te_libraries - Channel.fromList(params.te_libraries) - | map { tag, filePath -> - [[id:tag], file(filePath, checkIfExists: true)] - } - | branch { meta, file -> - gz: "$file".endsWith(".gz") - rest: !"$file".endsWith(".gz") - } - | set { ch_te_libraries } - - GUNZIP_TE_LIBRARY( - ch_te_libraries.gz - ) - .gunzip - | mix( - ch_te_libraries.rest - ) - | set { ch_gunzip_te_libraries } - - ch_versions - | mix(GUNZIP_TE_LIBRARY.out.versions.first()) - | set { ch_versions } - - // PERFORM_EDTA_ANNOTATION - ch_validated_target_assemblies - | join( - ch_gunzip_te_libraries, remainder: true - ) - | filter { meta, assembly, teLib -> - teLib == null - } - | map {meta, assembly, teLib -> [meta, assembly]} - | PERFORM_EDTA_ANNOTATION - - ch_versions - | mix(PERFORM_EDTA_ANNOTATION.out.versions) - | set { ch_versions } - - // REPEATMASKER - ch_validated_target_assemblies - | join( - PERFORM_EDTA_ANNOTATION.out.te_lib_fasta.mix(ch_gunzip_te_libraries) - ) - | set { ch_assemblies_n_te_libs } - - REPEATMASKER( - ch_assemblies_n_te_libs.map {meta, assembly, teLib -> [meta, assembly]}, - ch_assemblies_n_te_libs.map {meta, assembly, teLib -> teLib}, - ) - - ch_versions - | mix(REPEATMASKER.out.versions.first()) - | set { ch_versions } - - // STAR_GENOMEGENERATE - def star_ignore_sjdbgtf = true - STAR_GENOMEGENERATE( - REPEATMASKER.out.fasta_masked, - REPEATMASKER.out.fasta_masked.map{meta, maskedFasta -> [meta, []]}, - star_ignore_sjdbgtf - ) - .index - | set { ch_assembly_index } - - ch_versions - | mix(STAR_GENOMEGENERATE.out.versions.first()) - | set { ch_versions } - - // EXTRACT_SAMPLES - // https://github.com/nf-core/rnaseq - // MIT: https://github.com/nf-core/rnaseq/blob/master/LICENSE - // Changes - // Use meta.id as key for groupTuple as groupTuple does not work when there is a sublist in the key list - ch_samplesheet_path = Channel.empty() - if(params.samplesheet != null) { - ch_samplesheet_path = Channel.fromPath(params.samplesheet) - } - - EXTRACT_SAMPLES( - ch_samplesheet_path, - Channel.of(params.target_assemblies.collect{tag, fastaPath -> tag.strip()}.join(",")) - ) - .reads - | map { meta, fastq -> - new_id = meta.id - ~/_T\d+/ - [ new_id, meta + [id: new_id], fastq ] - } - | groupTuple() - | branch { meta_id, meta, fastq -> - single : fastq.size() == 1 - return [ meta.first(), fastq.flatten() ] - multiple: fastq.size() > 1 - return [ meta.first(), fastq.flatten() ] - } - | set { ch_fastq } - - ch_versions - | mix(EXTRACT_SAMPLES.out.versions) - | set { ch_versions } - - // CAT_FASTQ - // https://github.com/nf-core/rnaseq - // MIT: https://github.com/nf-core/rnaseq/blob/master/LICENSE - CAT_FASTQ ( - ch_fastq.multiple - ) - .reads - | mix(ch_fastq.single) - | set { ch_cat_fastq } - - ch_versions - | mix(CAT_FASTQ.out.versions.first()) - | set { ch_versions } - - // FASTQ_FASTQC_UMITOOLS_FASTP - // https://github.com/nf-core/rnaseq - // MIT: https://github.com/nf-core/rnaseq/blob/master/LICENSE - def with_umi = false - def skip_umi_extract = true - def umi_discard_read = false - FASTQ_FASTQC_UMITOOLS_FASTP ( - ch_cat_fastq, - params.sample_prep.skip_fastqc, - with_umi, - skip_umi_extract, - umi_discard_read, - params.sample_prep.skip_fastp, - [], - params.sample_prep.save_trimmed, - params.sample_prep.save_trimmed, - params.sample_prep.min_trimmed_reads - ) - .reads - | set { ch_trim_reads } - - // SORTMERNA - if (params.sample_prep.remove_ribo_rna) { - Channel.from(ch_ribo_db.readLines()) - | map { row -> file(row, checkIfExists: true) } - | collect - | set { ch_sortmerna_fastas } - - SORTMERNA ( - ch_trim_reads, - ch_sortmerna_fastas - ) - .reads - | set { ch_trim_reads } - - ch_versions - | mix(SORTMERNA.out.versions.first()) - | set { ch_versions } - } - - ch_trim_reads - | flatMap { meta, reads -> - def targetAssemblies = meta["target_assemblies"] - - readsByAssembly = [] - - for(assembly in targetAssemblies) { - readsByAssembly += [[[id: "${meta.id}.on.${assembly}", single_end: meta.single_end, target_assembly: assembly], reads]] - } - - return readsByAssembly - } - | set { ch_trim_reads_by_assembly } - - ch_versions - | mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions) - | set { ch_versions } - - // STAR_ALIGN - ch_assembly_index - | map { meta, index -> - [meta.id, index] - } - | cross( - ch_trim_reads_by_assembly.map{meta, reads -> [meta.target_assembly, meta, reads]} - ) - | map { indexWithExt, readsWithExt -> - def index = indexWithExt[1] - - def readsMeta = readsWithExt[1] - def reads = readsWithExt[2] - - [ - readsMeta, - reads, - index - ] - } - | set { ch_trim_reads_by_assembly_with_index } - - def seq_platform = false - def seq_center = false - STAR_ALIGN( - ch_trim_reads_by_assembly_with_index.map{meta, reads, index -> [meta, reads]}, - ch_trim_reads_by_assembly_with_index.map{meta, reads, index -> [[id: meta.target_assembly], index]}, - ch_trim_reads_by_assembly_with_index.map{meta, reads, index -> [[id: meta.target_assembly], []]}, - star_ignore_sjdbgtf, - seq_platform, - seq_center - ) - .bam_sorted - .tap { ch_mapped_reads } - .map { meta, bam -> - [ - [id: meta.target_assembly], - bam instanceof List ? bam.find {it =~ /Aligned/} : bam - ] - } - | groupTuple - | set { ch_mapped_reads_by_assembly } - - ch_versions - | mix(STAR_ALIGN.out.versions.first()) - | set { ch_versions } - - // SAMTOOLS_CAT - ch_mapped_reads_by_assembly - | branch { meta, bamList -> - bams: bamList.size() > 1 - bam: bamList.size() <= 1 - } - | set { ch_samtools_cat_inputs_branches } - - SAMTOOLS_CAT( - ch_samtools_cat_inputs_branches.bams - ) - .bam - | map { meta, bam -> - [ - meta, - [bam] - ] - } - | mix( - ch_samtools_cat_inputs_branches.bam - ) - | set { ch_cat_bam_by_assembly } - - ch_versions - | mix(SAMTOOLS_CAT.out.versions.first()) - | set { ch_versions } - - // GUNZIP: external_protein_seqs - ch_external_protein_seqs = Channel.empty() - if(params.external_protein_seqs != null) { - ch_external_protein_seqs = Channel.fromList(params.external_protein_seqs) - } - - ch_external_protein_seqs - | map { filePath -> - def fileHandle = file(filePath, checkIfExists: true) - [[id:fileHandle.getSimpleName()], fileHandle] - } - | branch { meta, file -> - gz: "$file".endsWith(".gz") - rest: !"$file".endsWith(".gz") - } - | set { ch_external_protein_seqs_branch } - - GUNZIP_EXTERNAL_PROTEIN_SEQ( - ch_external_protein_seqs_branch.gz - ) - .gunzip - | mix( - ch_external_protein_seqs_branch.rest - ) - | set { ch_gunzip_external_protein_seqs } - - ch_versions - | mix(GUNZIP_EXTERNAL_PROTEIN_SEQ.out.versions.first()) - | set { ch_versions } - - // CAT_PROTEIN_SEQS - ch_gunzip_external_protein_seqs - | map{meta, filePath -> filePath} - | collect - | map{fileList -> [[id:"protein_seqs"], fileList]} - | CAT_PROTEIN_SEQS - - CAT_PROTEIN_SEQS.out.file_out - | set { ch_protein_seq } - - ch_versions - | mix(CAT_PROTEIN_SEQS.out.versions) - | set { ch_versions } - - // BRAKER3 - REPEATMASKER.out.fasta_masked - | mix(ch_cat_bam_by_assembly) - | groupTuple(size: 2, remainder: true) - | map { meta, groupedItems -> - def maskedFasta = groupedItems[0] - - if(groupedItems.size() == 2) { - def bam = groupedItems[1] - return [meta, maskedFasta, bam] - } else { - return [meta, maskedFasta, []] - } - } - | set { ch_braker_inputs } - - if(params.external_protein_seqs) { - ch_braker_inputs - | combine(ch_protein_seq.map{meta, filePath -> filePath}) - | set { ch_braker_inputs } - } else { - ch_braker_inputs - | map{meta, assembly, bams -> [meta, assembly, bams, []]} - | set { ch_braker_inputs } - } - - ch_fasta = ch_braker_inputs.map{meta, assembly, bams, proteinSeq -> [meta, assembly]} - ch_bam = ch_braker_inputs.map{meta, assembly, bams, proteinSeq -> bams} - ch_rnaseq_sets_dirs = ch_braker_inputs.map{meta, assembly, bams, proteinSeq -> []} - ch_rnaseq_sets_ids = ch_braker_inputs.map{meta, assembly, bams, proteinSeq -> []} - ch_proteins = ch_braker_inputs.map{meta, assembly, bams, proteinSeq -> proteinSeq} - ch_hintsfile = ch_braker_inputs.map{meta, assembly, bams, proteinSeq -> []} - - BRAKER3( - ch_fasta, - ch_bam, - ch_rnaseq_sets_dirs, - ch_rnaseq_sets_ids, - ch_proteins, - ch_hintsfile - ) - - ch_versions - | mix(BRAKER3.out.versions.first()) - | set { ch_versions } -} \ No newline at end of file diff --git a/workflows/pangene.nf b/workflows/pangene.nf new file mode 100644 index 0000000..8512ff9 --- /dev/null +++ b/workflows/pangene.nf @@ -0,0 +1,150 @@ +include { validateParams } from '../modules/local/validate_params' +include { PREPARE_ASSEMBLY } from '../subworkflows/local/prepare_assembly' +include { PREPROCESS_RNASEQ } from '../subworkflows/local/preprocess_rnaseq' +include { ALIGN_RNASEQ } from '../subworkflows/local/align_rnaseq' +include { PREPARE_EXT_PROTS } from '../subworkflows/local/prepare_ext_prots' +include { BRAKER3 } from '../modules/kherronism/braker3' +include { FASTA_LIFTOFF } from '../subworkflows/local/fasta_liftoff' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions' + +validateParams(params) + +workflow PANGENE { + + ch_versions = Channel.empty() + + ch_target_assembly = Channel.fromList(params.target_assemblies) + | map { tag, filePath -> + [ [ id: tag ], file(filePath, checkIfExists: true) ] + } + + ch_te_library = Channel.fromList(params.te_libraries) + | map { tag, filePath -> + [ [ id:tag ], file(filePath, checkIfExists: true) ] + } + + ch_samplesheet = params.samplesheet + ? Channel.fromPath(params.samplesheet, checkIfExists: true) + : Channel.empty() + + ch_tar_assm_str = Channel.of( + params.target_assemblies + .collect { tag, fastaPath -> tag.strip() }.join(",") + ) + + ch_ribo_db = params.remove_ribo_rna + ? file(params.ribo_database_manifest, checkIfExists: true) + : null + + ch_sortmerna_fastas = ch_ribo_db + ? Channel.from(ch_ribo_db ? ch_ribo_db.readLines() : null) + | map { row -> file(row, checkIfExists: true) } + | collect + : Channel.empty() + + ch_ext_prot_fastas = params.external_protein_fastas + ? Channel.fromList(params.external_protein_fastas) + | map { filePath -> + def fileHandle = file(filePath, checkIfExists: true) + [ [ id: fileHandle.getSimpleName() ], fileHandle] + } + : Channel.empty() + + ch_xref_mm = params.liftoff_xref_annotations + ? Channel.fromList(params.liftoff_xref_annotations) + | multiMap { fasta, gff -> + def fastaFile = file(fasta, checkIfExists:true) + + fasta: [ [ id: fastaFile.getSimpleName() ], fastaFile ] + gff: [ [ id: fastaFile.getSimpleName() ], file(gff, checkIfExists:true) ] + } + : Channel.empty() + + ch_xref_fasta = ch_xref_mm.fasta + ch_xref_gff = ch_xref_mm.gff + + // SUBWORKFLOW: PREPARE_ASSEMBLY + PREPARE_ASSEMBLY( + ch_target_assembly, + ch_te_library, + params.repeat_annotator + ) + + ch_valid_target_assembly = PREPARE_ASSEMBLY.out.target_assemby + ch_masked_target_assembly = PREPARE_ASSEMBLY.out.masked_target_assembly + ch_target_assemby_index = PREPARE_ASSEMBLY.out.target_assemby_index + ch_versions = ch_versions.mix(PREPARE_ASSEMBLY.out.versions) + + // SUBWORKFLOW: PREPROCESS_RNASEQ + PREPROCESS_RNASEQ( + ch_samplesheet, + ch_tar_assm_str, + params.skip_fastqc, + params.skip_fastp, + params.save_trimmed, + params.min_trimmed_reads, + params.remove_ribo_rna, + ch_sortmerna_fastas + ) + + ch_trim_reads = PREPROCESS_RNASEQ.out.trim_reads + ch_reads_target = PREPROCESS_RNASEQ.out.reads_target + ch_versions = ch_versions.mix(PREPROCESS_RNASEQ.out.versions) + + // SUBWORKFLOW: ALIGN_RNASEQ + ALIGN_RNASEQ( + ch_reads_target, + ch_trim_reads, + ch_target_assemby_index + ) + + ch_rnaseq_bam = ALIGN_RNASEQ.out.bam + ch_versions = ch_versions.mix(ALIGN_RNASEQ.out.versions) + + // MODULE: PREPARE_EXT_PROTS + PREPARE_EXT_PROTS( + ch_ext_prot_fastas + ) + + ch_ext_prots_fasta = PREPARE_EXT_PROTS.out.ext_prots_fasta + ch_versions = ch_versions.mix(PREPARE_EXT_PROTS.out.versions) + + // MODULE: BRAKER3 + ch_braker_inputs = ch_masked_target_assembly + | join(ch_rnaseq_bam, remainder: true) + | combine( + ch_ext_prots_fasta.map { meta, filePath -> filePath }.ifEmpty(null) + ) + | map { meta, fasta, bam, prots -> [ meta, fasta, bam ?: [], prots ?: [] ] } + + def rnaseq_sets_dirs = [] + def rnaseq_sets_ids = [] + def hintsfile = [] + + BRAKER3( + ch_braker_inputs.map { meta, fasta, bam, prots -> [meta, fasta] }, + ch_braker_inputs.map { meta, fasta, bam, prots -> bam }, + rnaseq_sets_dirs, + rnaseq_sets_ids, + ch_braker_inputs.map { meta, fasta, bam, prots -> prots }, + hintsfile + ) + + ch_braker_gff3 = BRAKER3.out.gff3 + ch_versions = ch_versions.mix(BRAKER3.out.versions.first()) + + // SUBWORKFLOW: FASTA_LIFTOFF + FASTA_LIFTOFF( + ch_valid_target_assembly, + ch_xref_fasta, + ch_xref_gff + ) + + ch_liftoff_gff3 = FASTA_LIFTOFF.out.gff3 + ch_versions = ch_versions.mix(FASTA_LIFTOFF.out.versions) + + // MODULE: CUSTOM_DUMPSOFTWAREVERSIONS + CUSTOM_DUMPSOFTWAREVERSIONS ( + ch_versions.unique().collectFile(name: 'collated_versions.yml') + ) +}
Process Name \\", + " \\ Software Version
CUSTOM_DUMPSOFTWAREVERSIONSpython3.11.7
yaml5.4.1
TOOL1tool10.11.9
TOOL2tool21.9
WorkflowNextflow12.922000 K (92.984097%)", + "single end (151 cycles)" ] + def log_text = [ "Q20 bases: 12922(92.9841%)", + "reads passed filter: 99" ] + def read_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { assert snapshot(process.out.json).match("test_fastp_single_end_json") }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_single_end-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_single_end-stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = [ + [ id:'test', single_end:true ], + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_single_end-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_paired_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:25.719000 K (93.033098%)", + "The input has little adapter percentage (~0.000000%), probably it's trimmed before."] + def log_text = [ "No adapter detected for read1", + "Q30 bases: 12281(88.3716%)"] + def json_text = ['"passed_filter_reads": 198'] + def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { json_text.each { json_part -> + { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } + } + }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_paired_end-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_paired_end-stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_paired_end-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("fastp test_fastp_interleaved") { + config './nextflow.config' + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = [ [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) ] + ] + + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:25.719000 K (93.033098%)", + "paired end (151 cycles + 151 cycles)"] + def log_text = [ "Q20 bases: 12922(92.9841%)", + "reads passed filter: 198"] + def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { assert snapshot(process.out.json).match("fastp test_fastp_interleaved_json") }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_interleaved-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("fastp test_fastp_interleaved-stub") { + + options '-stub' + + config './nextflow.config' + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = [ [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) ] + ] + + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_interleaved-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_single_end_trim_fail") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = true + save_merged = false + + input[0] = [ [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:12.922000 K (92.984097%)", + "single end (151 cycles)"] + def log_text = [ "Q20 bases: 12922(92.9841%)", + "reads passed filter: 99" ] + def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } + } + }, + { failed_read_lines.each { failed_read_line -> + { assert path(process.out.reads_fail.get(0).get(1)).linesGzip.contains(failed_read_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { assert snapshot(process.out.json).match("test_fastp_single_end_trim_fail_json") }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_paired_end_trim_fail") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = true + save_merged = false + + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:25.719000 K (93.033098%)", + "The input has little adapter percentage (~0.000000%), probably it's trimmed before."] + def log_text = [ "No adapter detected for read1", + "Q30 bases: 12281(88.3716%)"] + def json_text = ['"passed_filter_reads": 198'] + def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { failed_read2_lines.each { failed_read2_line -> + { assert path(process.out.reads_fail.get(0).get(1).get(1)).linesGzip.contains(failed_read2_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { json_text.each { json_part -> + { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } + } + }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_paired_end_merged") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = true + + input[0] = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "
"] + def log_text = [ "Merged and filtered:", + "total reads: 75", + "total bases: 13683"] + def json_text = ['"merged_and_filtered": {', '"total_reads": 75', '"total_bases": 13683'] + def read1_lines = [ "@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/1", + "CCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTC", + "AAAAAEAEEAEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEAEEEEEEEEEEEEEEEEE/EAEEEEEE/6EEEEEEEEEEAEEAEEE/EE/AEEAEEEEEAEEEA/EEAAEAE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { read_merged_lines.each { read_merged_line -> + { assert path(process.out.reads_merged.get(0).get(1)).linesGzip.contains(read_merged_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { json_text.each { json_part -> + { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } + } + }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_paired_end_merged-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_paired_end_merged-stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = true + + input[0] = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_paired_end_merged-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_paired_end_merged_adapterlist") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/fastp/adapters.fasta", checkIfExists: true) + save_trimmed_fail = false + save_merged = true + + input[0] = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "
"] + def log_text = [ "Merged and filtered:", + "total reads: 75", + "total bases: 13683"] + def json_text = ['"merged_and_filtered": {', '"total_reads": 75', '"total_bases": 13683',"--adapter_fasta"] + def read1_lines = ["@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/1", + "CCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTC", + "AAAAAEAEEAEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEAEEEEEEEEEEEEEEEEE/EAEEEEEE/6EEEEEEEEEEAEEAEEE/EE/AEEAEEEEEAEEEA/EEAAEAE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { read_merged_lines.each { read_merged_line -> + { assert path(process.out.reads_merged.get(0).get(1)).linesGzip.contains(read_merged_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { json_text.each { json_part -> + { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } + } + }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } +} diff --git a/modules/nf-core/fastp/tests/main.nf.test.snap b/modules/nf-core/fastp/tests/main.nf.test.snap new file mode 100644 index 0000000..1b7d241 --- /dev/null +++ b/modules/nf-core/fastp/tests/main.nf.test.snap @@ -0,0 +1,107 @@ +{ + "test_fastp_paired_end-for_stub_match": { + "content": [ + [ + [ + "test_1.fastp.fastq.gz", + "test_2.fastp.fastq.gz" + ], + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=false}" + ] + ], + "timestamp": "2023-12-21T09:44:37.202512" + }, + "fastp test_fastp_interleaved_json": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,168f516f7bd4b7b6c32da7cba87299a4" + ] + ] + ], + "timestamp": "2023-10-17T11:04:45.794175881" + }, + "test_fastp_paired_end_merged-for_stub_match": { + "content": [ + [ + [ + "test_1.fastp.fastq.gz", + "test_2.fastp.fastq.gz" + ], + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "test.merged.fastq.gz", + "{id=test, single_end=false}" + ] + ], + "timestamp": "2023-12-21T09:53:45.237014" + }, + "test_fastp_single_end_json": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,c852d7a6dba5819e4ac8d9673bedcacc" + ] + ] + ], + "timestamp": "2023-10-17T11:04:10.566343705" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "timestamp": "2023-10-17T11:04:10.582076024" + }, + "test_fastp_interleaved-for_stub_match": { + "content": [ + [ + "test.fastp.fastq.gz", + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=true}" + ] + ], + "timestamp": "2023-12-21T09:48:43.148485" + }, + "test_fastp_single_end-for_stub_match": { + "content": [ + [ + "test.fastp.fastq.gz", + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=true}" + ] + ], + "timestamp": "2023-12-21T09:20:07.254788" + }, + "test_fastp_single_end_trim_fail_json": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,9a7ee180f000e8d00c7fb67f06293eb5" + ] + ] + ], + "timestamp": "2023-10-17T11:05:00.379878948" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastp/tests/nextflow.config b/modules/nf-core/fastp/tests/nextflow.config new file mode 100644 index 0000000..0f7849a --- /dev/null +++ b/modules/nf-core/fastp/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + + withName: FASTP { + ext.args = "--interleaved_in" + } +} diff --git a/modules/nf-core/fastp/tests/tags.yml b/modules/nf-core/fastp/tests/tags.yml new file mode 100644 index 0000000..c1afcce --- /dev/null +++ b/modules/nf-core/fastp/tests/tags.yml @@ -0,0 +1,2 @@ +fastp: + - modules/nf-core/fastp/** diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml new file mode 100644 index 0000000..1787b38 --- /dev/null +++ b/modules/nf-core/fastqc/environment.yml @@ -0,0 +1,7 @@ +name: fastqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::fastqc=0.12.1 diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 249f906..9e19a74 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -2,10 +2,10 @@ process FASTQC { tag "$meta.id" label 'process_medium' - conda "bioconda::fastqc=0.11.9" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'biocontainers/fastqc:0.11.9--0' }" + 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : + 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" input: tuple val(meta), path(reads) @@ -37,7 +37,7 @@ process FASTQC { cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ @@ -49,7 +49,7 @@ process FASTQC { cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ } diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml index 4da5bb5..ee5507e 100644 --- a/modules/nf-core/fastqc/meta.yml +++ b/modules/nf-core/fastqc/meta.yml @@ -50,3 +50,8 @@ authors: - "@grst" - "@ewels" - "@FelixKrueger" +maintainers: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test index 3961de6..ad9bc54 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -1,32 +1,220 @@ nextflow_process { name "Test Process FASTQC" - script "modules/nf-core/fastqc/main.nf" + script "../main.nf" process "FASTQC" + + tag "modules" + tag "modules_nfcore" tag "fastqc" - test("Single-Read") { + test("sarscov2 single-end [fastq]") { when { process { """ input[0] = [ - [ id: 'test', single_end:true ], - [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) - ] + [ id: 'test', single_end:true ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] ] """ } } then { - assert process.success - assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html" - assert path(process.out.html.get(0).get(1)).getText().contains("
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls