From ee9d0b9c56883697807ff1b4e8fb847ae0bab419 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Tue, 7 Nov 2023 12:51:46 +1300
Subject: [PATCH 01/59] Added updated info about nf-core modules

---
 modules/nf-core/CHANGELOG.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/modules/nf-core/CHANGELOG.md b/modules/nf-core/CHANGELOG.md
index 1b886a1..2721570 100644
--- a/modules/nf-core/CHANGELOG.md
+++ b/modules/nf-core/CHANGELOG.md
@@ -23,4 +23,6 @@
 ### sortmerna
 
 1. Added stub
-2. Added author in meta.yml
\ No newline at end of file
+2. Added author in meta.yml
+
+- Repo: https://github.com/nf-core/modules/tree/4e2cbac1db88f544711e488e552175368ca14588
\ No newline at end of file

From 7f03697dfe00ae5dd7d18ead8c293117e638b6c8 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Tue, 7 Nov 2023 13:00:55 +1300
Subject: [PATCH 02/59] Turned off SortMeRNA by default

---
 TODO.md         | 3 ++-
 nextflow.config | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/TODO.md b/TODO.md
index a9be74b..d243565 100644
--- a/TODO.md
+++ b/TODO.md
@@ -1,4 +1,5 @@
 - [ ] Rename perform_edta_annotation to FASTA_PERFORM_EDTA
 - [ ] Extract subworkflows
 - [ ] STAR ignores softmasking and, thus, should be fed the unmasked genome so that masking and mapping can run in parallel.
-- [ ] Add --eval=reference.gtf
\ No newline at end of file
+- [ ] Add --eval=reference.gtf
+- [ ] Replace quay containers with galaxyproject cache containers.
\ No newline at end of file
diff --git a/nextflow.config b/nextflow.config
index a624175..3a60fdb 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -48,7 +48,7 @@ params {
         // toggling this parameter results in rerun of FASTP and FASTQC_TRIM
         save_trimmed            = false
 
-        remove_ribo_rna         = true
+        remove_ribo_rna         = false
         save_non_ribo_reads     = false
         ribo_database_manifest  = "${projectDir}/assets/rrna-db-defaults.txt"
     }

From 232493a7fc52aab9862b1825a0447e561d921f03 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Wed, 8 Nov 2023 13:55:31 +1300
Subject: [PATCH 03/59] Decouple target assemblies and read qc/align

---
 .gitignore                            |  15 --
 README.md                             |   5 +-
 conf/modules.config                   |  18 +-
 modules/nf-core/fastp/main.nf         |   8 +-
 nextflow.config                       |  20 ++-
 subworkflows/local/extract_samples.nf |  23 ++-
 workflows/pan_gene.nf                 | 233 ++++++++++----------------
 7 files changed, 128 insertions(+), 194 deletions(-)

diff --git a/.gitignore b/.gitignore
index cc3b658..6e9d9d4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,24 +1,9 @@
 .DS_Store
-
 *.pyc
 __pycahce__
-
-nextflow
 .nextflow*
 work/
-*.dot
-
-Results/
 results/
-report/
-Report/
-
-*.log
-.nfs*
-
-*.sif
-
-pan_gene_slurm.sh
 *.stdout
 *.stderr
 
diff --git a/README.md b/README.md
index 2eaf965..7dbf45e 100644
--- a/README.md
+++ b/README.md
@@ -112,5 +112,6 @@ Some software components of this pipeline have been adopted from following third
 >
 > _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).
 
-2. rewarewaannotation [MIT](https://github.com/kherronism/rewarewaannotation/blob/master/LICENSE): https://github.com/kherronism/rewarewaannotation
-3. assembly_qc [GPL-3.0](https://github.com/Plant-Food-Research-Open/assembly_qc/blob/main/LICENSE): https://github.com/Plant-Food-Research-Open/assembly_qc
\ No newline at end of file
+2. nf-core/rnaseq [MIT](https://github.com/nf-core/rnaseq/blob/master/LICENSE): https://github.com/nf-core/rnaseq
+3. rewarewaannotation [MIT](https://github.com/kherronism/rewarewaannotation/blob/master/LICENSE): https://github.com/kherronism/rewarewaannotation
+4. assembly_qc [GPL-3.0](https://github.com/Plant-Food-Research-Open/assembly_qc/blob/main/LICENSE): https://github.com/Plant-Food-Research-Open/assembly_qc
\ No newline at end of file
diff --git a/conf/modules.config b/conf/modules.config
index 66205cb..0cd7bb5 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -31,8 +31,6 @@ process {
     }
 }
 
-// https://github.com/nf-core/rnaseq
-// MIT: https://github.com/nf-core/rnaseq/blob/master/LICENSE
 if(!params.sample_prep.skip_fastqc) {
     process {
         withName: '.*:FASTQ_FASTQC_UMITOOLS_FASTP:FASTQC_RAW' {
@@ -50,17 +48,20 @@ if(!params.sample_prep.skip_fastqc) {
     }
 }
 
-// https://github.com/nf-core/rnaseq
-// MIT: https://github.com/nf-core/rnaseq/blob/master/LICENSE
 if(!params.sample_prep.skip_fastp) {
     process {
         withName: '.*:FASTQ_FASTQC_UMITOOLS_FASTP:FASTP' {
             ext.args   = params.sample_prep.extra_fastp_args ?: ''
             publishDir = [
                 [
-                    path: { "${params.outdir}/fastp" },
+                    path: { "${params.outdir}/fastp/html" },
                     mode:  "copy",
-                    pattern: "*.{json,html}"
+                    pattern: "*.{html}"
+                ],
+                [
+                    path: { "${params.outdir}/fastp/json" },
+                    mode:  "copy",
+                    pattern: "*.{json}"
                 ],
                 [
                     path: { "${params.outdir}/fastp/log" },
@@ -99,12 +100,7 @@ if (params.sample_prep.remove_ribo_rna) {
     }
 }
 
-// https://github.com/kherronism/rewarewaannotation
-// MIT: https://github.com/nf-core/rnaseq/blob/master/LICENSE
 process {
-
-    // Changes:
-    // Introduced additional defaults 
     withName: STAR_ALIGN {
         ext.args = [
             "--outSAMstrandField intronMotif",
diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf
index ee38e1d..9c747d3 100644
--- a/modules/nf-core/fastp/main.nf
+++ b/modules/nf-core/fastp/main.nf
@@ -101,12 +101,16 @@ process FASTP {
     }
 
     stub:
-    def prefix = task.ext.prefix ?: "${meta.id}"
+    def prefix              = task.ext.prefix ?: "${meta.id}"
+    def isSingleOutput      = task.ext.args?.contains('--interleaved_in') || meta.single_end
+    def outputFiles         = isSingleOutput ? "${prefix}.fastp.fastq.gz" : "${prefix}_1.fastp.fastq.gz ${prefix}_2.fastp.fastq.gz"
+    def mergedFileCommand   = (!isSingleOutput && save_merged) ? "touch ${prefix}.merged.fastq.gz" : ""
     """
-    touch "${prefix}.fastp.fastq.gz"
+    touch $outputFiles
     touch "${prefix}.json"
     touch "${prefix}.html"
     touch "${prefix}.log"
+    $mergedFileCommand
     
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/nextflow.config b/nextflow.config
index 3a60fdb..ec56979 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -1,6 +1,10 @@
 includeConfig './conf/base.config'
 
 params {
+    target_assemblies           = [
+        ["red5_v2p1", "/workspace/hrauxr/pan-gene/.test/red5_v2p1_chr1.fasta"],
+        ["donghong", "/workspace/hrauxr/pan-gene/.test/donghong.chr1.fsa.gz"]
+    ]
     // FASTA files (fasta, fasta.gz) for the assemblies to annotate
     //
     // Pattern:               [["tag", "file path"]]
@@ -14,10 +18,10 @@ params {
     //                              ["tag2", "./a/relative/path/to/the/fasta/file2.fasta"],
     //                              ["tag3", "https://ftp.ncbi.nlm.nih.gov/genomes/test_genome.fna"], ...]
     // target_assemblies        = [["tair10", "/an/absolute/path/to/the/fasta/file.fasta"]]
-    target_assemblies           = [
-        ["red5_v2p1", "/workspace/hrauxr/pan-gene/.test/red5_v2p1_chr1.fasta"]
+    
+    te_libraries                = [
+        ["donghong", "/workspace/hrauxr/pan-gene/.test/donghong.TElib.fa.gz"]
     ]
-
     // TE libs (fasta, fasta.gz) for target_assemblies
     //
     // Optional                 Set to [] if libraries are not available, te_libraries = []
@@ -26,8 +30,6 @@ params {
     // Not all target_assemblies need to have an associated (by tag) TE library.
     // When the TE lib is not available for a traget assembly, EDTA is used to create one.
     
-    te_libraries                = []
-    
     edta {
         is_sensitive            = false
         save_outputs            = true
@@ -36,8 +38,8 @@ params {
         save_outputs            = true
     }
     
-    // Optional: Set to null if not available
     samplesheet                 = "./.test/samplesheet.csv"
+    // Optional: Set to null if not available
 
     sample_prep {
         skip_fastqc             = false
@@ -45,8 +47,8 @@ params {
         min_trimmed_reads       = 10000
         extra_fastp_args        = ""
 
-        // toggling this parameter results in rerun of FASTP and FASTQC_TRIM
         save_trimmed            = false
+        // toggling this parameter results in rerun of FASTP and FASTQC_TRIM
 
         remove_ribo_rna         = false
         save_non_ribo_reads     = false
@@ -59,17 +61,17 @@ params {
         save_outputs            = false
     }
 
-    // Optional: Set to null if not available
     external_protein_seqs       = [
         "/workspace/ComparativeDataSources/OrthoDB11/Viridiplantae.fa.gz",
         "/output/genomic/fairGenomes/Plant/Actinidia/chinensis/var_chinensis/male/2x/assembly_russell/v2.1/RU01.20221115150135.pep.fasta"
     ]
+    // Optional: Set to null if not available
 
     braker {
         extra_braker_args       = ""
     }
 
-    outdir                  = "./results"
+    outdir                      = "./results"
 
     max_cpus                = 12
     max_memory              = 200.GB
diff --git a/subworkflows/local/extract_samples.nf b/subworkflows/local/extract_samples.nf
index f3c1a15..d05de4c 100644
--- a/subworkflows/local/extract_samples.nf
+++ b/subworkflows/local/extract_samples.nf
@@ -7,6 +7,8 @@ nextflow.enable.dsl=2
 // Added channel permissible_target_assemblies
 // Changed file name from input_check.nf to extract_samples.nf
 // Removed strandedness
+// Nowing emitting an extra channel 'assemblies' which indicates the
+// assemblies targeted by each read
 //
 // Check input samplesheet and get read channels
 //
@@ -20,14 +22,23 @@ workflow EXTRACT_SAMPLES {
 
     main:
     SAMPLESHEET_CHECK ( samplesheet, permissible_target_assemblies )
-        .csv
-        .splitCsv ( header:true, sep:',' )
-        .map { create_fastq_channel(it) }
-        .set { reads }
+    .csv
+    | splitCsv ( header:true, sep:',' )
+    | map { create_fastq_channel(it) }
+    | set { ch_reads }
 
+    reads = ch_reads.map { meta, fastq -> [[id:meta.id, single_end:meta.single_end], fastq]}
+    
+    ch_reads
+    | flatMap { meta, fastq ->
+        meta.target_assemblies.collect { assembly -> [[id:meta.id, single_end:meta.single_end], assembly] }
+    }
+    | set { assemblies }
+    
     emit:
-    reads                                     // channel: [ val(meta), [ reads ] ]
-    versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ]
+    reads                                       // channel: [ val(meta), [ reads ] ]
+    assemblies                                  // channel: [ val(meta), val(assembly) ]
+    versions = SAMPLESHEET_CHECK.out.versions   // channel: [ versions.yml ]
 }
 
 // Function to get list of [ meta, [ fastq_1, fastq_2 ] ]
diff --git a/workflows/pan_gene.nf b/workflows/pan_gene.nf
index fccbe2e..46dd112 100644
--- a/workflows/pan_gene.nf
+++ b/workflows/pan_gene.nf
@@ -31,10 +31,9 @@ if (params.sample_prep.remove_ribo_rna) {
 workflow PAN_GENE {
 
     // Versions
-    Channel.empty()
-    | set { ch_versions }
+    ch_versions = Channel.empty()
     
-    // GUNZIP: target_assemblies
+    // MODULE: GUNZIP_TARGET_ASSEMBLY
     Channel.fromList(params.target_assemblies)
     | map { tag, filePath ->
         [[id:tag], file(filePath, checkIfExists: true)]
@@ -54,20 +53,16 @@ workflow PAN_GENE {
     )
     | set { ch_gunzip_target_assemblies }
 
-    ch_versions
-    | mix(GUNZIP_TARGET_ASSEMBLY.out.versions.first())
-    | set { ch_versions }
+    ch_versions = ch_versions.mix(GUNZIP_TARGET_ASSEMBLY.out.versions.first())
 
-    // FASTA_VALIDATE
+    // MODULE: FASTA_VALIDATE
     FASTA_VALIDATE(ch_gunzip_target_assemblies)
     .valid_fasta
     | set { ch_validated_target_assemblies }
 
-    ch_versions
-    | mix(FASTA_VALIDATE.out.versions.first())
-    | set { ch_versions }
+    ch_versions = ch_versions.mix(FASTA_VALIDATE.out.versions.first())
 
-    // GUNZIP: te_libraries
+    // MODULE: GUNZIP_TE_LIBRARY
     Channel.fromList(params.te_libraries)
     | map { tag, filePath ->
         [[id:tag], file(filePath, checkIfExists: true)]
@@ -87,11 +82,9 @@ workflow PAN_GENE {
     )
     | set { ch_gunzip_te_libraries }
 
-    ch_versions
-    | mix(GUNZIP_TE_LIBRARY.out.versions.first())
-    | set { ch_versions }
+    ch_versions = ch_versions.mix(GUNZIP_TE_LIBRARY.out.versions.first())
 
-    // PERFORM_EDTA_ANNOTATION
+    // SUBWORKFLOW: PERFORM_EDTA_ANNOTATION
     ch_validated_target_assemblies
     | join(
         ch_gunzip_te_libraries, remainder: true
@@ -102,11 +95,9 @@ workflow PAN_GENE {
     | map {meta, assembly, teLib -> [meta, assembly]}
     | PERFORM_EDTA_ANNOTATION
 
-    ch_versions
-    | mix(PERFORM_EDTA_ANNOTATION.out.versions)
-    | set { ch_versions }
+    ch_versions = ch_versions.mix(PERFORM_EDTA_ANNOTATION.out.versions)
     
-    // REPEATMASKER
+    // MODULE: REPEATMASKER
     ch_validated_target_assemblies
     | join(
         PERFORM_EDTA_ANNOTATION.out.te_lib_fasta.mix(ch_gunzip_te_libraries)
@@ -118,11 +109,9 @@ workflow PAN_GENE {
         ch_assemblies_n_te_libs.map {meta, assembly, teLib -> teLib},
     )
 
-    ch_versions
-    | mix(REPEATMASKER.out.versions.first())
-    | set { ch_versions }
+    ch_versions = ch_versions.mix(REPEATMASKER.out.versions.first())
 
-    // STAR_GENOMEGENERATE
+    // MODULE: STAR_GENOMEGENERATE
     def star_ignore_sjdbgtf = true
     STAR_GENOMEGENERATE(
         REPEATMASKER.out.fasta_masked,
@@ -132,15 +121,9 @@ workflow PAN_GENE {
     .index
     | set { ch_assembly_index }
 
-    ch_versions
-    | mix(STAR_GENOMEGENERATE.out.versions.first())
-    | set { ch_versions }
+    ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions.first())
 
-    // EXTRACT_SAMPLES
-    // https://github.com/nf-core/rnaseq
-    // MIT: https://github.com/nf-core/rnaseq/blob/master/LICENSE
-    // Changes
-    // Use meta.id as key for groupTuple as groupTuple does not work when there is a sublist in the key list
+    // SUBWORKFLOW: EXTRACT_SAMPLES
     ch_samplesheet_path = Channel.empty()
     if(params.samplesheet != null) {
         ch_samplesheet_path = Channel.fromPath(params.samplesheet)
@@ -152,25 +135,22 @@ workflow PAN_GENE {
     )
     .reads
     | map { meta, fastq ->
-        new_id = meta.id - ~/_T\d+/
-        [ new_id, meta + [id: new_id], fastq ]
+        groupID = meta.id - ~/_T\d+/
+        [ meta + [id: groupID], fastq ]
     }
     | groupTuple()
-    | branch { meta_id, meta, fastq ->
+    | branch { meta, fastq ->
         single  : fastq.size() == 1
-            return [ meta.first(), fastq.flatten() ]
+            return [ meta, fastq.flatten() ]
         multiple: fastq.size() > 1
-            return [ meta.first(), fastq.flatten() ]
+            return [ meta, fastq.flatten() ]
     }
     | set { ch_fastq }
 
-    ch_versions
-    | mix(EXTRACT_SAMPLES.out.versions)
-    | set { ch_versions }
+    ch_read_target_assemblies = EXTRACT_SAMPLES.out.assemblies
+    ch_versions = ch_versions.mix(EXTRACT_SAMPLES.out.versions)
 
-    // CAT_FASTQ
-    // https://github.com/nf-core/rnaseq
-    // MIT: https://github.com/nf-core/rnaseq/blob/master/LICENSE
+    // MODULES: CAT_FASTQ
     CAT_FASTQ (
         ch_fastq.multiple
     )
@@ -178,13 +158,9 @@ workflow PAN_GENE {
     | mix(ch_fastq.single)
     | set { ch_cat_fastq }
     
-    ch_versions
-    | mix(CAT_FASTQ.out.versions.first())
-    | set { ch_versions }
+    ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first())
 
-    // FASTQ_FASTQC_UMITOOLS_FASTP
-    // https://github.com/nf-core/rnaseq
-    // MIT: https://github.com/nf-core/rnaseq/blob/master/LICENSE
+    // SUBWORKFLOW: FASTQ_FASTQC_UMITOOLS_FASTP
     def with_umi            = false
     def skip_umi_extract    = true
     def umi_discard_read    = false
@@ -203,7 +179,9 @@ workflow PAN_GENE {
     .reads
     | set { ch_trim_reads }
 
-    // SORTMERNA
+    ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions)
+
+    // MODULE: SORTMERNA
     if (params.sample_prep.remove_ribo_rna) {
         Channel.from(ch_ribo_db.readLines())
         | map { row -> file(row, checkIfExists: true) }
@@ -217,110 +195,75 @@ workflow PAN_GENE {
         .reads
         | set { ch_trim_reads }
 
-        ch_versions
-        | mix(SORTMERNA.out.versions.first())
-        | set { ch_versions }
+        ch_versions = ch_versions.mix(SORTMERNA.out.versions.first())
     }
 
-    ch_trim_reads
-    | flatMap { meta, reads ->
-        def targetAssemblies = meta["target_assemblies"]
-
-        readsByAssembly = []
-
-        for(assembly in targetAssemblies) {
-            readsByAssembly += [[[id: "${meta.id}.on.${assembly}", single_end: meta.single_end, target_assembly: assembly], reads]]
-        }
-
-        return readsByAssembly
+    // MODULE: STAR_ALIGN
+    ch_read_target_assemblies
+    | map { meta, assembly ->
+        groupID = meta.id - ~/_T\d+/
+        [ meta + [id: groupID], assembly ]
     }
-    | set { ch_trim_reads_by_assembly }
-
-    ch_versions
-    | mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions)
-    | set { ch_versions }
-
-    // STAR_ALIGN
-    ch_assembly_index
-    | map { meta, index ->
-        [meta.id, index]
+    | unique
+    | combine(ch_trim_reads, by:0)
+    | map { meta, assembly, fastq ->
+        [assembly, [id:"${meta.id}.on.${assembly}", single_end:meta.single_end, target_assembly:assembly], fastq]
     }
-    | cross(
-        ch_trim_reads_by_assembly.map{meta, reads -> [meta.target_assembly, meta, reads]}
+    | combine(
+        ch_assembly_index.map { meta, index -> [meta.id, index] },
+        by:0
     )
-    | map { indexWithExt, readsWithExt ->
-        def index = indexWithExt[1]
-
-        def readsMeta = readsWithExt[1]
-        def reads = readsWithExt[2]
-
-        [
-            readsMeta,
-            reads,
-            index
-        ]
-    }
-    | set { ch_trim_reads_by_assembly_with_index }
+    | map { assembly, meta, fastq, index -> [meta, fastq, index] }
+    | set { ch_star_inputs }
 
     def seq_platform = false
     def seq_center = false
     STAR_ALIGN(
-        ch_trim_reads_by_assembly_with_index.map{meta, reads, index -> [meta, reads]},
-        ch_trim_reads_by_assembly_with_index.map{meta, reads, index -> [[id: meta.target_assembly], index]},
-        ch_trim_reads_by_assembly_with_index.map{meta, reads, index -> [[id: meta.target_assembly], []]},
+        ch_star_inputs.map{meta, fastq, index -> [meta, fastq]},
+        ch_star_inputs.map{meta, fastq, index -> [[id: meta.target_assembly], index]},
+        ch_star_inputs.map{meta, fastq, index -> [[id: meta.target_assembly], []]},
         star_ignore_sjdbgtf,
         seq_platform,
         seq_center
     )
     .bam_sorted
-    .tap { ch_mapped_reads }
-    .map { meta, bam ->
+    | set { ch_star_bam }
+
+    ch_versions = ch_versions.mix(STAR_ALIGN.out.versions.first())
+
+    // MODULE: SAMTOOLS_CAT
+    ch_star_bam
+    | map { meta, bam ->
         [
             [id: meta.target_assembly],
             bam instanceof List ? bam.find {it =~ /Aligned/} : bam
         ]
     }
     | groupTuple
-    | set { ch_mapped_reads_by_assembly }
-
-    ch_versions
-    | mix(STAR_ALIGN.out.versions.first())
-    | set { ch_versions }
-
-    // SAMTOOLS_CAT
-    ch_mapped_reads_by_assembly
     | branch { meta, bamList ->
         bams: bamList.size() > 1
         bam: bamList.size() <= 1
     }
-    | set { ch_samtools_cat_inputs_branches }
+    | set { ch_star_bam_branch }
 
     SAMTOOLS_CAT(
-        ch_samtools_cat_inputs_branches.bams
+        ch_star_bam_branch.bams
     )
-    .bam
-    | map { meta, bam ->
-        [
-            meta,
-            [bam]
-        ]
-    }
+    .bam.map { meta, bam -> [meta, [bam]] }
     | mix(
-        ch_samtools_cat_inputs_branches.bam
+        ch_star_bam_branch.bam
     )
-    | set { ch_cat_bam_by_assembly }
+    | set { ch_samtools_bam }
 
-    ch_versions
-    | mix(SAMTOOLS_CAT.out.versions.first())
-    | set { ch_versions }
+    ch_versions = ch_versions.mix(SAMTOOLS_CAT.out.versions.first())
 
-    // GUNZIP: external_protein_seqs
-    ch_external_protein_seqs = Channel.empty()
+    // MODULE: GUNZIP_EXTERNAL_PROTEIN_SEQ
+    ch_ext_prot_seqs = Channel.empty()
     if(params.external_protein_seqs != null) {
-        ch_external_protein_seqs = Channel.fromList(params.external_protein_seqs)
+        ch_ext_prot_seqs = Channel.fromList(params.external_protein_seqs)
     }
     
-    ch_external_protein_seqs
+    ch_ext_prot_seqs
     | map { filePath ->
         def fileHandle = file(filePath, checkIfExists: true)
         [[id:fileHandle.getSimpleName()], fileHandle]
@@ -329,38 +272,32 @@ workflow PAN_GENE {
         gz: "$file".endsWith(".gz")
         rest: !"$file".endsWith(".gz")
     }
-    | set { ch_external_protein_seqs_branch }
+    | set { ch_ext_prot_seqs_branch }
 
     GUNZIP_EXTERNAL_PROTEIN_SEQ(
-        ch_external_protein_seqs_branch.gz
+        ch_ext_prot_seqs_branch.gz
     )
     .gunzip
     | mix(
-        ch_external_protein_seqs_branch.rest
+        ch_ext_prot_seqs_branch.rest
     )
-    | set { ch_gunzip_external_protein_seqs }
+    | set { ch_ext_prot_seqs }
 
-    ch_versions
-    | mix(GUNZIP_EXTERNAL_PROTEIN_SEQ.out.versions.first())
-    | set { ch_versions }
+    ch_versions = ch_versions.mix(GUNZIP_EXTERNAL_PROTEIN_SEQ.out.versions.first())
 
-    // CAT_PROTEIN_SEQS
-    ch_gunzip_external_protein_seqs
-    | map{meta, filePath -> filePath}
+    // MODULE: CAT_PROTEIN_SEQS
+    ch_ext_prot_seqs
+    | map{ meta, filePath -> filePath }
     | collect
-    | map{fileList -> [[id:"protein_seqs"], fileList]}
+    | map{ fileList -> [[id:"protein_seqs"], fileList] }
     | CAT_PROTEIN_SEQS
     
-    CAT_PROTEIN_SEQS.out.file_out
-    | set { ch_protein_seq }
-
-    ch_versions
-    | mix(CAT_PROTEIN_SEQS.out.versions)
-    | set { ch_versions }
+    ch_ext_prot_seqs = CAT_PROTEIN_SEQS.out.file_out
+    ch_versions = ch_versions.mix(CAT_PROTEIN_SEQS.out.versions)
 
-    // BRAKER3
+    // MODULE: BRAKER3
     REPEATMASKER.out.fasta_masked
-    | mix(ch_cat_bam_by_assembly)
+    | mix(ch_samtools_bam)
     | groupTuple(size: 2, remainder: true)
     | map { meta, groupedItems ->
         def maskedFasta = groupedItems[0]
@@ -376,20 +313,20 @@ workflow PAN_GENE {
     
     if(params.external_protein_seqs) {
         ch_braker_inputs
-        | combine(ch_protein_seq.map{meta, filePath -> filePath})
+        | combine(ch_ext_prot_seqs.map{meta, filePath -> filePath})
         | set { ch_braker_inputs }
     } else {
         ch_braker_inputs
-        | map{meta, assembly, bams -> [meta, assembly, bams, []]}
+        | map{meta, assembly, bam -> [meta, assembly, bam, []]}
         | set { ch_braker_inputs }
     }
     
-    ch_fasta            = ch_braker_inputs.map{meta, assembly, bams, proteinSeq -> [meta, assembly]}
-    ch_bam              = ch_braker_inputs.map{meta, assembly, bams, proteinSeq -> bams}
-    ch_rnaseq_sets_dirs = ch_braker_inputs.map{meta, assembly, bams, proteinSeq -> []}
-    ch_rnaseq_sets_ids  = ch_braker_inputs.map{meta, assembly, bams, proteinSeq -> []}
-    ch_proteins         = ch_braker_inputs.map{meta, assembly, bams, proteinSeq -> proteinSeq}
-    ch_hintsfile        = ch_braker_inputs.map{meta, assembly, bams, proteinSeq -> []}
+    ch_fasta            = ch_braker_inputs.map{ meta, assembly, bam, proteinSeq -> [meta, assembly] }
+    ch_bam              = ch_braker_inputs.map{ meta, assembly, bam, proteinSeq -> bam }
+    ch_proteins         = ch_braker_inputs.map{ meta, assembly, bam, proteinSeq -> proteinSeq }
+    ch_rnaseq_sets_dirs = []
+    ch_rnaseq_sets_ids  = []
+    ch_hintsfile        = []
 
     BRAKER3(
         ch_fasta,
@@ -400,7 +337,5 @@ workflow PAN_GENE {
         ch_hintsfile
     )
 
-    ch_versions
-    | mix(BRAKER3.out.versions.first())
-    | set { ch_versions }
+    ch_versions = ch_versions.mix(BRAKER3.out.versions.first())
 }
\ No newline at end of file

From 4f2ed8e7d6b996c2ea5e78c40638d6d7545a0f44 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Thu, 9 Nov 2023 15:21:59 +1300
Subject: [PATCH 04/59] A bit of reformatiing

---
 nextflow.config | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index ec56979..be33522 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -73,9 +73,9 @@ params {
 
     outdir                      = "./results"
 
-    max_cpus                = 12
-    max_memory              = 200.GB
-    max_time                = 1.days
+    max_cpus                    = 12
+    max_memory                  = 200.GB
+    max_time                    = 1.days
 }
 
 includeConfig './conf/modules.config'

From 8b83c61aaf748efa5b93a8d82a68f7143abdb7c5 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Thu, 9 Nov 2023 15:58:46 +1300
Subject: [PATCH 05/59] Started implementing liftoff

---
 conf/modules.config           | 13 ++++++++++
 modules/local/liftoff/main.nf | 48 +++++++++++++++++++++++++++++++++++
 modules/nf-core/CHANGELOG.md  |  2 +-
 nextflow.config               | 14 ++++++++++
 4 files changed, 76 insertions(+), 1 deletion(-)
 create mode 100644 modules/local/liftoff/main.nf

diff --git a/conf/modules.config b/conf/modules.config
index 0cd7bb5..f5dc1c6 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -130,4 +130,17 @@ process {
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
         ]
     }
+}
+
+if(params.liftoff.xref_annotations) {
+    process {
+        withName: LIFTOFF {
+            ext.args = '-exclude_partial',
+            publishDir = [
+                path: { "${params.outdir}/liftoff/${meta.id}" },
+                mode: "copy",
+                saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+            ]
+        }
+    }
 }
\ No newline at end of file
diff --git a/modules/local/liftoff/main.nf b/modules/local/liftoff/main.nf
new file mode 100644
index 0000000..cec7bd1
--- /dev/null
+++ b/modules/local/liftoff/main.nf
@@ -0,0 +1,48 @@
+process LIFTOFF {
+    tag "$meta.id"
+    label "process_high"
+
+    container "https://depot.galaxyproject.org/singularity/liftoff:1.6.3--pyhdfd78af_0"
+
+    input:
+    tuple val(meta), path(target_fa)
+    path ref_fa
+    path ref_gff
+    
+    output:
+    tuple val(meta), path("*.liftoff.gff3")         , emit: gff3
+    tuple val(meta), path("unmapped_features.txt")  , emit: unmapped
+    path "versions.yml"                             , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    liftoff \\
+    -g $ref_gff \\
+    -p $task.cpus \\
+    $args \\
+    $target_fa \\
+    $ref_fa \\
+    > "${prefix}.liftoff.gff3"
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        liftoff: \$(liftoff --version)
+    END_VERSIONS
+    """
+    
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch "${prefix}.liftoff.gff3"
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        liftoff: \$(liftoff --version)
+    END_VERSIONS
+    """
+}
\ No newline at end of file
diff --git a/modules/nf-core/CHANGELOG.md b/modules/nf-core/CHANGELOG.md
index 2721570..f7e0034 100644
--- a/modules/nf-core/CHANGELOG.md
+++ b/modules/nf-core/CHANGELOG.md
@@ -25,4 +25,4 @@
 1. Added stub
 2. Added author in meta.yml
 
-- Repo: https://github.com/nf-core/modules/tree/4e2cbac1db88f544711e488e552175368ca14588
\ No newline at end of file
+- Repo: https://github.com/nf-core/modules/tree/18cd2206622dc606bbceea533c7823feb2a251db
\ No newline at end of file
diff --git a/nextflow.config b/nextflow.config
index be33522..6980e1c 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -71,6 +71,20 @@ params {
         extra_braker_args       = ""
     }
 
+    liftoff {
+        xref_annotations        = [
+            [
+                "/output/genomic/fairGenomes/Plant/Actinidia/chinensis/var_chinensis/male/2x/assembly_russell/v2.1/Russell_V2a.chromosomes.and.unassiged.and.haplotigs.fsa",
+                "/output/genomic/fairGenomes/Plant/Actinidia/chinensis/var_chinensis/male/2x/assembly_russell/v2.1/RU01.20221115150135.gff3"
+            ],
+            [
+                "/output/genomic/fairGenomes/Plant/Arabidopsis/thaliana/var_na/sex_na/2x/assembly_tair/v10/TAIR10_chr_all.fas",
+                "/output/genomic/fairGenomes/Plant/Arabidopsis/thaliana/var_na/sex_na/2x/assembly_tair/v10/TAIR10_GFF3_genes_transposons.fixed.gff3"
+            ]
+        ]
+        // Optional: Set to null if not available
+    }
+
     outdir                      = "./results"
 
     max_cpus                    = 12

From 37ce74edb234d9792d2b59368855c9210fd7e865 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Fri, 10 Nov 2023 13:10:22 +1300
Subject: [PATCH 06/59] Checkpoint before major reshuffle

---
 conf/modules.config   |  2 +-
 nextflow.config       |  6 +--
 workflows/pan_gene.nf | 89 ++++++++++++++++++++++++++++++++++++-------
 3 files changed, 80 insertions(+), 17 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index f5dc1c6..132ffe9 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -135,7 +135,7 @@ process {
 if(params.liftoff.xref_annotations) {
     process {
         withName: LIFTOFF {
-            ext.args = '-exclude_partial',
+            ext.args = '-exclude_partial -copies'
             publishDir = [
                 path: { "${params.outdir}/liftoff/${meta.id}" },
                 mode: "copy",
diff --git a/nextflow.config b/nextflow.config
index 6980e1c..daf1eef 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -50,7 +50,7 @@ params {
         save_trimmed            = false
         // toggling this parameter results in rerun of FASTP and FASTQC_TRIM
 
-        remove_ribo_rna         = false
+        remove_ribo_rna         = true
         save_non_ribo_reads     = false
         ribo_database_manifest  = "${projectDir}/assets/rrna-db-defaults.txt"
     }
@@ -87,8 +87,8 @@ params {
 
     outdir                      = "./results"
 
-    max_cpus                    = 12
-    max_memory                  = 200.GB
+    max_cpus                    = 1
+    max_memory                  = 4.GB
     max_time                    = 1.days
 }
 
diff --git a/workflows/pan_gene.nf b/workflows/pan_gene.nf
index 46dd112..2198765 100644
--- a/workflows/pan_gene.nf
+++ b/workflows/pan_gene.nf
@@ -12,6 +12,8 @@ include { STAR_ALIGN                            } from '../modules/nf-core/star/
 include { SAMTOOLS_CAT                          } from '../modules/nf-core/samtools/cat'
 include { CAT_CAT as CAT_PROTEIN_SEQS           } from '../modules/nf-core/cat/cat'
 include { BRAKER3                               } from '../modules/kherronism/braker3'
+include { GUNZIP as GUNZIP_XREF_FASTA           } from '../modules/nf-core/gunzip'
+include { GUNZIP as GUNZIP_XREF_GFF             } from '../modules/nf-core/gunzip'
 
 include { PERFORM_EDTA_ANNOTATION               } from '../subworkflows/local/perform_edta_annotation'
 include { EXTRACT_SAMPLES                       } from '../subworkflows/local/extract_samples'
@@ -105,8 +107,8 @@ workflow PAN_GENE {
     | set { ch_assemblies_n_te_libs }
 
     REPEATMASKER(
-        ch_assemblies_n_te_libs.map {meta, assembly, teLib -> [meta, assembly]},
-        ch_assemblies_n_te_libs.map {meta, assembly, teLib -> teLib},
+        ch_assemblies_n_te_libs.map { meta, assembly, teLib -> [meta, assembly] },
+        ch_assemblies_n_te_libs.map { meta, assembly, teLib -> teLib },
     )
 
     ch_versions = ch_versions.mix(REPEATMASKER.out.versions.first())
@@ -115,7 +117,7 @@ workflow PAN_GENE {
     def star_ignore_sjdbgtf = true
     STAR_GENOMEGENERATE(
         REPEATMASKER.out.fasta_masked,
-        REPEATMASKER.out.fasta_masked.map{meta, maskedFasta -> [meta, []]},
+        REPEATMASKER.out.fasta_masked.map { meta, maskedFasta -> [meta, []] },
         star_ignore_sjdbgtf
     )
     .index
@@ -131,7 +133,7 @@ workflow PAN_GENE {
     
     EXTRACT_SAMPLES(
         ch_samplesheet_path,
-        Channel.of(params.target_assemblies.collect{tag, fastaPath -> tag.strip()}.join(","))
+        Channel.of(params.target_assemblies.collect { tag, fastaPath -> tag.strip() }.join(","))
     )
     .reads
     | map { meta, fastq ->
@@ -219,9 +221,9 @@ workflow PAN_GENE {
     def seq_platform = false
     def seq_center = false
     STAR_ALIGN(
-        ch_star_inputs.map{meta, fastq, index -> [meta, fastq]},
-        ch_star_inputs.map{meta, fastq, index -> [[id: meta.target_assembly], index]},
-        ch_star_inputs.map{meta, fastq, index -> [[id: meta.target_assembly], []]},
+        ch_star_inputs.map { meta, fastq, index -> [meta, fastq] },
+        ch_star_inputs.map { meta, fastq, index -> [[id: meta.target_assembly], index] },
+        ch_star_inputs.map { meta, fastq, index -> [[id: meta.target_assembly], []] },
         star_ignore_sjdbgtf,
         seq_platform,
         seq_center
@@ -259,7 +261,7 @@ workflow PAN_GENE {
 
     // MODULE: GUNZIP_EXTERNAL_PROTEIN_SEQ
     ch_ext_prot_seqs = Channel.empty()
-    if(params.external_protein_seqs != null) {
+    if(params.external_protein_seqs) {
         ch_ext_prot_seqs = Channel.fromList(params.external_protein_seqs)
     }
     
@@ -287,9 +289,9 @@ workflow PAN_GENE {
 
     // MODULE: CAT_PROTEIN_SEQS
     ch_ext_prot_seqs
-    | map{ meta, filePath -> filePath }
+    | map { meta, filePath -> filePath }
     | collect
-    | map{ fileList -> [[id:"protein_seqs"], fileList] }
+    | map { fileList -> [[id:"protein_seqs"], fileList] }
     | CAT_PROTEIN_SEQS
     
     ch_ext_prot_seqs = CAT_PROTEIN_SEQS.out.file_out
@@ -321,9 +323,9 @@ workflow PAN_GENE {
         | set { ch_braker_inputs }
     }
     
-    ch_fasta            = ch_braker_inputs.map{ meta, assembly, bam, proteinSeq -> [meta, assembly] }
-    ch_bam              = ch_braker_inputs.map{ meta, assembly, bam, proteinSeq -> bam }
-    ch_proteins         = ch_braker_inputs.map{ meta, assembly, bam, proteinSeq -> proteinSeq }
+    ch_fasta            = ch_braker_inputs.map { meta, assembly, bam, proteinSeq -> [meta, assembly] }
+    ch_bam              = ch_braker_inputs.map { meta, assembly, bam, proteinSeq -> bam }
+    ch_proteins         = ch_braker_inputs.map { meta, assembly, bam, proteinSeq -> proteinSeq }
     ch_rnaseq_sets_dirs = []
     ch_rnaseq_sets_ids  = []
     ch_hintsfile        = []
@@ -338,4 +340,65 @@ workflow PAN_GENE {
     )
 
     ch_versions = ch_versions.mix(BRAKER3.out.versions.first())
+
+    // MODULE: GUNZIP_XREF_FASTA
+    ch_xref_annotations = Channel.empty()
+    if(params.liftoff.xref_annotations) {
+        Channel.fromList(params.liftoff.xref_annotations)
+        | multiMap { fasta, gff ->
+            def fastaFile = file(fasta, checkIfExists:true)
+            def meta = [id:fastaFile.getSimpleName()]
+
+            fasta: [meta, fastaFile]
+            gff: [meta, file(gff, checkIfExists:true)]
+        }
+        | set { ch_xref_annotations }
+    }
+
+    ch_xref_annotations.fasta
+    | branch { meta, file ->
+        gz: "$file".endsWith(".gz")
+        rest: !"$file".endsWith(".gz")
+    }
+    | set { ch_xref_annotations_branch }
+
+    GUNZIP_XREF_FASTA(
+        ch_xref_annotations_branch.gz
+    )
+    .gunzip
+    | mix(
+        ch_xref_annotations_branch.rest
+    )
+    | set { ch_xref_annotations_fasta }
+
+    // MODULE: GUNZIP_XREF_GFF
+    ch_xref_annotations.gff
+    | branch { meta, file ->
+        gz: "$file".endsWith(".gz")
+        rest: !"$file".endsWith(".gz")
+    }
+    | set { ch_xref_annotations_gff_branch }
+
+    GUNZIP_XREF_GFF(
+        ch_xref_annotations_gff_branch.gff.map { meta, fasta, gff -> [meta, gff] }
+    )
+    .gunzip
+    | mix(
+        ch_xref_annotations_gff_branch.rest.map { meta, fasta, gff -> [meta, gff] }
+    )
+    | set { ch_xref_annotations_gff }
+
+    ch_xref_annotations_fasta
+    | join(
+        ch_xref_annotations_gff
+    )
+    | set { ch_xref_annotations }
+
+    // // MODULE: LIFTOFF
+    // ch_xref_annotations
+    // | combine(
+    //     ch_validated_target_assemblies
+    // )
+    // | map { meta, ref_fasta, refGFF, targetMeta, targetFasta -> [[id:"${targetMeta.id}.from.${meta.id}"], ref_fasta, refGFF, targetFasta] }
+    // | set { ch_liftoff_inputs }
 }
\ No newline at end of file

From d148f1824ee07d653d05239c76496c62a94c16e5 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Fri, 10 Nov 2023 13:59:21 +1300
Subject: [PATCH 07/59] Reformatted local modules

---
 TODO.md                                       |   1 -
 modules/local/edta/edta/main.nf               | 107 +++++++++---------
 modules/local/edta/restore_edta_ids/main.nf   |  97 ++++++++--------
 ...ming_f1b7bce.py => reverse_edta_naming.py} |   2 +-
 modules/local/edta/shorten_edta_ids/main.nf   |  55 +++++----
 ...ta_ids_c97537f.py => shorten_fasta_ids.py} |   9 +-
 modules/local/fasta_validate/main.nf          |  55 +++++----
 modules/local/samplesheet_check/main.nf       |   3 +-
 modules/local/validate_params/main.nf         |   4 -
 subworkflows/local/extract_samples.nf         |   8 +-
 subworkflows/local/fasta_edta.nf              |  43 +++++++
 subworkflows/local/perform_edta_annotation.nf |  48 --------
 12 files changed, 203 insertions(+), 229 deletions(-)
 rename modules/local/edta/restore_edta_ids/resources/usr/bin/{reverse_edta_naming_f1b7bce.py => reverse_edta_naming.py} (98%)
 rename modules/local/edta/shorten_edta_ids/resources/usr/bin/{shorten_fasta_ids_c97537f.py => shorten_fasta_ids.py} (96%)
 create mode 100644 subworkflows/local/fasta_edta.nf
 delete mode 100644 subworkflows/local/perform_edta_annotation.nf

diff --git a/TODO.md b/TODO.md
index d243565..6f0a836 100644
--- a/TODO.md
+++ b/TODO.md
@@ -1,4 +1,3 @@
-- [ ] Rename perform_edta_annotation to FASTA_PERFORM_EDTA
 - [ ] Extract subworkflows
 - [ ] STAR ignores softmasking and, thus, should be fed the unmasked genome so that masking and mapping can run in parallel.
 - [ ] Add --eval=reference.gtf
diff --git a/modules/local/edta/edta/main.nf b/modules/local/edta/edta/main.nf
index 99b6811..56fd196 100644
--- a/modules/local/edta/edta/main.nf
+++ b/modules/local/edta/edta/main.nf
@@ -1,7 +1,3 @@
-nextflow.enable.dsl=2
-
-// https://github.com/Plant-Food-Research-Open/assembly_qc
-// GPL-3.0: https://github.com/Plant-Food-Research-Open/assembly_qc/blob/main/LICENSE
 process EDTA {
     tag "$meta.id"
     label "process_high"
@@ -11,64 +7,67 @@ process EDTA {
     containerOptions "-B $TMPDIR:$TMPDIR"
 
     input:
-        tuple val(meta), path(fasta_file)
+    tuple val(meta), path(fasta_file)
     
     output:
-        tuple val(meta), path('*.EDTA.TElib.fa'),       emit: te_lib_fasta
-        tuple val(meta), path('*.EDTA.intact.gff3'),    emit: intact_gff3
-        tuple val(meta), path('*.EDTA.pass.list'),      emit: pass_list
-        tuple val(meta), path('*.EDTA.out'),            emit: out_file
-        tuple val(meta), path('*.EDTA.TEanno.gff3'),    emit: te_anno_gff3
-        path "versions.yml",                            emit: versions
+    tuple val(meta), path('*.EDTA.TElib.fa')    , emit: te_lib_fasta
+    tuple val(meta), path('*.EDTA.intact.gff3') , emit: intact_gff3
+    tuple val(meta), path('*.EDTA.pass.list')   , emit: pass_list
+    tuple val(meta), path('*.EDTA.out')         , emit: out_file
+    tuple val(meta), path('*.EDTA.TEanno.gff3') , emit: te_anno_gff3
+    path "versions.yml"                         , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
     
     script:
-        def args = task.ext.args ?: ''
-        def modFileName = "${fasta_file}.mod"
-        """
-        EDTA.pl \\
-        --genome $fasta_file \\
-        --threads $task.cpus \\
-        $args
-        
-        if [ -f "${modFileName}.EDTA.raw/LTR/${modFileName}.pass.list" ]; then
-            cat "${modFileName}.EDTA.raw/LTR/${modFileName}.pass.list" \\
-            > "${modFileName}.EDTA.pass.list"
-        else
-            echo "EDTA PASS LIST IS EMPTY" \\
-            > "${modFileName}.EDTA.pass.list"
-        fi
+    def args = task.ext.args ?: ''
+    def modFileName = "${fasta_file}.mod"
+    """
+    EDTA.pl \\
+    --genome $fasta_file \\
+    --threads $task.cpus \\
+    $args
+    
+    if [ -f "${modFileName}.EDTA.raw/LTR/${modFileName}.pass.list" ]; then
+        cat "${modFileName}.EDTA.raw/LTR/${modFileName}.pass.list" \\
+        > "${modFileName}.EDTA.pass.list"
+    else
+        echo "EDTA PASS LIST IS EMPTY" \\
+        > "${modFileName}.EDTA.pass.list"
+    fi
 
-        if [ -f "${modFileName}.EDTA.anno/${modFileName}.out" ]; then
-            cat "${modFileName}.EDTA.anno/${modFileName}.out" \\
-            > "${modFileName}.EDTA.out"
-        else
-            echo "EDTA DID NOT PRODUCE AN OUT FILE" \\
-            > "${modFileName}.EDTA.out"
-        fi
+    if [ -f "${modFileName}.EDTA.anno/${modFileName}.out" ]; then
+        cat "${modFileName}.EDTA.anno/${modFileName}.out" \\
+        > "${modFileName}.EDTA.out"
+    else
+        echo "EDTA DID NOT PRODUCE AN OUT FILE" \\
+        > "${modFileName}.EDTA.out"
+    fi
 
-        if [ ! -f "${modFileName}.EDTA.TEanno.gff3" ]; then
-            echo "##EDTA DID NOT PRODUCE A TEANNO GFF3" \\
-            > "${modFileName}.EDTA.TEanno.gff3"
-        fi
+    if [ ! -f "${modFileName}.EDTA.TEanno.gff3" ]; then
+        echo "##EDTA DID NOT PRODUCE A TEANNO GFF3" \\
+        > "${modFileName}.EDTA.TEanno.gff3"
+    fi
 
-        cat <<-END_VERSIONS > versions.yml
-        "${task.process}":
-            EDTA: \$(EDTA.pl -h | awk ' /##### Extensive/ {print \$7}')
-        END_VERSIONS
-        """
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        EDTA: \$(EDTA.pl -h | awk ' /##### Extensive/ {print \$7}')
+    END_VERSIONS
+    """
     
     stub:
-        def modFileName = "${fasta_file}.mod"
-        """
-        touch "${modFileName}.EDTA.TElib.fa"
-        touch "${modFileName}.EDTA.intact.gff3"
-        touch "${modFileName}.EDTA.pass.list"
-        touch "${modFileName}.EDTA.out"
-        touch "${modFileName}.EDTA.TEanno.gff3"
+    def modFileName = "${fasta_file}.mod"
+    """
+    touch "${modFileName}.EDTA.TElib.fa"
+    touch "${modFileName}.EDTA.intact.gff3"
+    touch "${modFileName}.EDTA.pass.list"
+    touch "${modFileName}.EDTA.out"
+    touch "${modFileName}.EDTA.TEanno.gff3"
 
-        cat <<-END_VERSIONS > versions.yml
-        "${task.process}":
-            EDTA: \$(EDTA.pl -h | awk ' /##### Extensive/ {print \$7}')
-        END_VERSIONS
-        """
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        EDTA: \$(EDTA.pl -h | awk ' /##### Extensive/ {print \$7}')
+    END_VERSIONS
+    """
 }
\ No newline at end of file
diff --git a/modules/local/edta/restore_edta_ids/main.nf b/modules/local/edta/restore_edta_ids/main.nf
index 606848c..4da8a34 100644
--- a/modules/local/edta/restore_edta_ids/main.nf
+++ b/modules/local/edta/restore_edta_ids/main.nf
@@ -1,7 +1,3 @@
-nextflow.enable.dsl=2
-
-// https://github.com/Plant-Food-Research-Open/assembly_qc
-// GPL-3.0: https://github.com/Plant-Food-Research-Open/assembly_qc/blob/main/LICENSE
 process RESTORE_EDTA_IDS {
     tag "$meta.id"
     label "process_single"
@@ -9,58 +5,59 @@ process RESTORE_EDTA_IDS {
     container "docker://gallvp/python3npkgs:v0.4"
 
     input:
-        tuple val(meta), path(te_lib_fa)
-        path(intact_gff3)
-        path(pass_list)
-        path(out_file)
-        path(te_anno_gff3)
-        path(renamed_ids_tsv)
+    tuple val(meta), path(te_lib_fa)
+    path(intact_gff3)
+    path(pass_list)
+    path(out_file)
+    path(te_anno_gff3)
+    path(renamed_ids_tsv)
     
     output:
-        tuple val(meta), path("${meta.id}.EDTA.TElib.fa"),              emit: te_lib_fasta
-        tuple val(meta), path("${meta.id}.EDTA.intact.gff3"),           emit: intact_gff3
-        tuple val(meta), path("${meta.id}.renamed.ids.EDTA.pass.list"), emit: pass_list
-        tuple val(meta), path("${meta.id}.renamed.ids.EDTA.out"),       emit: out_file
-        tuple val(meta), path("${meta.id}.EDTA.TEanno.gff3"),           emit: te_anno_gff3
-        tuple val(meta), path("${meta.id}.renamed.ids.tsv"),            emit: renamed_ids_tsv
-        path "versions.yml",                                            emit: versions
+    tuple val(meta), path("${meta.id}.EDTA.TElib.fa")               , emit: te_lib_fasta
+    tuple val(meta), path("${meta.id}.EDTA.intact.gff3")            , emit: intact_gff3
+    tuple val(meta), path("${meta.id}.renamed.ids.EDTA.pass.list")  , emit: pass_list
+    tuple val(meta), path("${meta.id}.renamed.ids.EDTA.out")        , emit: out_file
+    tuple val(meta), path("${meta.id}.EDTA.TEanno.gff3")            , emit: te_anno_gff3
+    tuple val(meta), path("${meta.id}.renamed.ids.tsv")             , emit: renamed_ids_tsv
+    path "versions.yml"                                             , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
 
     script:
-        def VERSION = "f1b7bce" // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
-        """
-        cat $pass_list > "${meta.id}.renamed.ids.EDTA.pass.list"
-        cat $out_file > "${meta.id}.renamed.ids.EDTA.out"
-        cat $te_lib_fa > "${meta.id}.EDTA.TElib.fa"
-        cat $renamed_ids_tsv > "${meta.id}.renamed.ids.tsv"
-        
-        renamed_ids_head=\$(head -n 1 "$renamed_ids_tsv")
-        
-        if [[ \$renamed_ids_head == "IDs have acceptable length and character. No change required." ]]; then
-            cat $te_anno_gff3 > "${meta.id}.EDTA.TEanno.gff3"
-            cat $intact_gff3 > "${meta.id}.EDTA.intact.gff3"
-        else
-            reverse_edta_naming_f1b7bce.py "$renamed_ids_tsv" "$te_anno_gff3" "$intact_gff3" "$meta"
-        fi
+    """
+    cat $pass_list > "${meta.id}.renamed.ids.EDTA.pass.list"
+    cat $out_file > "${meta.id}.renamed.ids.EDTA.out"
+    cat $te_lib_fa > "${meta.id}.EDTA.TElib.fa"
+    cat $renamed_ids_tsv > "${meta.id}.renamed.ids.tsv"
+    
+    renamed_ids_head=\$(head -n 1 "$renamed_ids_tsv")
+    
+    if [[ \$renamed_ids_head == "IDs have acceptable length and character. No change required." ]]; then
+        cat $te_anno_gff3 > "${meta.id}.EDTA.TEanno.gff3"
+        cat $intact_gff3 > "${meta.id}.EDTA.intact.gff3"
+    else
+        reverse_edta_naming.py "$renamed_ids_tsv" "$te_anno_gff3" "$intact_gff3" "$meta"
+    fi
 
-        cat <<-END_VERSIONS > versions.yml
-        "${task.process}":
-            reverse_edta_naming: $VERSION
-        END_VERSIONS
-        """
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        reverse_edta_naming: \$(md5sum \$(which reverse_edta_naming.py) | cut -d' ' -f1)
+    END_VERSIONS
+    """
     
     stub:
-        def VERSION = "f1b7bce" // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
-        """
-        touch "${meta.id}.EDTA.TElib.fa"
-        touch "${meta.id}.EDTA.intact.gff3"
-        touch "${meta.id}.renamed.ids.EDTA.pass.list"
-        touch "${meta.id}.renamed.ids.EDTA.out"
-        touch "${meta.id}.EDTA.TEanno.gff3"
-        touch "${meta.id}.renamed.ids.tsv"
+    """
+    touch "${meta.id}.EDTA.TElib.fa"
+    touch "${meta.id}.EDTA.intact.gff3"
+    touch "${meta.id}.renamed.ids.EDTA.pass.list"
+    touch "${meta.id}.renamed.ids.EDTA.out"
+    touch "${meta.id}.EDTA.TEanno.gff3"
+    touch "${meta.id}.renamed.ids.tsv"
 
-        cat <<-END_VERSIONS > versions.yml
-        "${task.process}":
-            reverse_edta_naming: $VERSION
-        END_VERSIONS
-        """
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        reverse_edta_naming: \$(md5sum \$(which reverse_edta_naming.py) | cut -d' ' -f1)
+    END_VERSIONS
+    """
 }
\ No newline at end of file
diff --git a/modules/local/edta/restore_edta_ids/resources/usr/bin/reverse_edta_naming_f1b7bce.py b/modules/local/edta/restore_edta_ids/resources/usr/bin/reverse_edta_naming.py
similarity index 98%
rename from modules/local/edta/restore_edta_ids/resources/usr/bin/reverse_edta_naming_f1b7bce.py
rename to modules/local/edta/restore_edta_ids/resources/usr/bin/reverse_edta_naming.py
index c047100..7e8522c 100755
--- a/modules/local/edta/restore_edta_ids/resources/usr/bin/reverse_edta_naming_f1b7bce.py
+++ b/modules/local/edta/restore_edta_ids/resources/usr/bin/reverse_edta_naming.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import sys
 
diff --git a/modules/local/edta/shorten_edta_ids/main.nf b/modules/local/edta/shorten_edta_ids/main.nf
index 829667b..e216ce4 100644
--- a/modules/local/edta/shorten_edta_ids/main.nf
+++ b/modules/local/edta/shorten_edta_ids/main.nf
@@ -1,7 +1,3 @@
-nextflow.enable.dsl=2
-
-// https://github.com/Plant-Food-Research-Open/assembly_qc
-// GPL-3.0: https://github.com/Plant-Food-Research-Open/assembly_qc/blob/main/LICENSE
 process SHORTEN_EDTA_IDS {
     tag "$meta.id"
     label "process_single"
@@ -9,39 +5,40 @@ process SHORTEN_EDTA_IDS {
     container "docker://gallvp/python3npkgs:v0.4"
     
     input:
-        tuple val(meta), path(fasta_file)
+    tuple val(meta), path(fasta_file)
     
     output:
-        tuple val(meta), path("*.renamed.ids.fa"),  emit: renamed_ids_fasta
-        tuple val(meta), path("*.renamed.ids.tsv"), emit: renamed_ids_tsv
-        path "versions.yml",                        emit: versions
+    tuple val(meta), path("*.renamed.ids.fa")   , emit: renamed_ids_fasta
+    tuple val(meta), path("*.renamed.ids.tsv")  , emit: renamed_ids_tsv
+    path "versions.yml"                         , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
     
     script:
-        def VERSION = "c97537f" // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
-        """
-        FILE="$fasta_file"
-        output_prefix="\${FILE%.*}"
+    """
+    FILE="$fasta_file"
+    output_prefix="\${FILE%.*}"
 
-        shorten_fasta_ids_c97537f.py "$fasta_file" "\$output_prefix"
+    shorten_fasta_ids.py "$fasta_file" "\$output_prefix"
 
-        cat <<-END_VERSIONS > versions.yml
-        "${task.process}":
-            shorten_fasta_ids: $VERSION
-        END_VERSIONS
-        """
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        shorten_fasta_ids: \$(md5sum \$(which shorten_fasta_ids.py) | cut -d' ' -f1)
+    END_VERSIONS
+    """
     
     stub:
-        def VERSION = "c97537f" // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
-        """
-        FILE="$fasta_file"
-        output_prefix="\${FILE%.*}"
+    """
+    FILE="$fasta_file"
+    output_prefix="\${FILE%.*}"
 
-        touch "\${output_prefix}.renamed.ids.fa"
-        touch "\${output_prefix}.renamed.ids.tsv"
+    touch "\${output_prefix}.renamed.ids.fa"
+    touch "\${output_prefix}.renamed.ids.tsv"
 
-        cat <<-END_VERSIONS > versions.yml
-        "${task.process}":
-            shorten_fasta_ids: $VERSION
-        END_VERSIONS
-        """
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        shorten_fasta_ids: \$(md5sum \$(which shorten_fasta_ids.py) | cut -d' ' -f1)
+    END_VERSIONS
+    """
 }
\ No newline at end of file
diff --git a/modules/local/edta/shorten_edta_ids/resources/usr/bin/shorten_fasta_ids_c97537f.py b/modules/local/edta/shorten_edta_ids/resources/usr/bin/shorten_fasta_ids.py
similarity index 96%
rename from modules/local/edta/shorten_edta_ids/resources/usr/bin/shorten_fasta_ids_c97537f.py
rename to modules/local/edta/shorten_edta_ids/resources/usr/bin/shorten_fasta_ids.py
index e5b62b3..0b6e6d2 100755
--- a/modules/local/edta/shorten_edta_ids/resources/usr/bin/shorten_fasta_ids_c97537f.py
+++ b/modules/local/edta/shorten_edta_ids/resources/usr/bin/shorten_fasta_ids.py
@@ -1,13 +1,10 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import re
 import sys
 
 from Bio import SeqIO
 
-# https://github.com/Plant-Food-Research-Open/assembly_qc
-# GPL-3.0: https://github.com/Plant-Food-Research-Open/assembly_qc/blob/main/LICENSE
-
 # The input fasta file path
 fasta_file_path = sys.argv[1]
 
@@ -153,10 +150,10 @@ def fail_if_new_ids_not_valid(ids):
 
     if not do_ids_need_to_change(input_ids):
         print("IDs have acceptable length and character. No change required.")
-        
+
         with open(f"{output_files_prefix}.renamed.ids.tsv", "w") as f:
             f.write("IDs have acceptable length and character. No change required.")
-        
+
         write_fasta_without_comments(fasta_file_path, output_files_prefix)
 
         exit(0)
diff --git a/modules/local/fasta_validate/main.nf b/modules/local/fasta_validate/main.nf
index 7f8370c..7c37c39 100644
--- a/modules/local/fasta_validate/main.nf
+++ b/modules/local/fasta_validate/main.nf
@@ -1,7 +1,3 @@
-nextflow.enable.dsl=2
-
-// https://github.com/Plant-Food-Research-Open/assembly_qc
-// GPL-3.0: https://github.com/Plant-Food-Research-Open/assembly_qc/blob/main/LICENSE
 process FASTA_VALIDATE {
     tag "$meta.id"
     label "process_single"
@@ -9,38 +5,39 @@ process FASTA_VALIDATE {
     container "docker://gallvp/fasta_validator:a6a2ec1_ps"
 
     input:
-        tuple val(meta), path(fasta_file)
+    tuple val(meta), path(fasta_file)
     
     output:
-        tuple val(meta), path("$validFasta"),   emit: valid_fasta
-        path "versions.yml",                    emit: versions
+    tuple val(meta), path("$validFasta")    , emit: valid_fasta
+    path "versions.yml"                     , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
 
     script:
-        validFasta = (fasta_file.toString() - ~/\.\w+$/) + ".validated.fasta"
-        def VERSION = "a6a2ec1_ps" // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
-        """
-        fasta_validate -v $fasta_file >/dev/null
+    validFasta = (fasta_file.toString() - ~/\.\w+$/) + ".validated.fasta"
+    """
+    fasta_validate -v $fasta_file >/dev/null
 
-        # If invalid, the above command will fail and
-        # the NXF error startegy will kick in.
-        
-        cat $fasta_file > $validFasta
+    # If invalid, the above command will fail and
+    # the NXF error startegy will kick in.
+    
+    cat $fasta_file > $validFasta
 
-        cat <<-END_VERSIONS > versions.yml
-        "${task.process}":
-            fasta_validate: $VERSION
-        END_VERSIONS
-        """
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        fasta_validate: \$(md5sum \$(which fasta_validate) | cut -d' ' -f1)
+    END_VERSIONS
+    """
     
     stub:
-        validFasta = (fasta_file.toString() - ~/\.\w+$/) + ".validated.fasta"
-        def VERSION = "a6a2ec1_ps" // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
-        """
-        touch $validFasta
+    validFasta = (fasta_file.toString() - ~/\.\w+$/) + ".validated.fasta"
+    """
+    touch $validFasta
 
-        cat <<-END_VERSIONS > versions.yml
-        "${task.process}":
-            fasta_validate: $VERSION
-        END_VERSIONS
-        """
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        fasta_validate: \$(md5sum \$(which fasta_validate) | cut -d' ' -f1)
+    END_VERSIONS
+    """
 }
\ No newline at end of file
diff --git a/modules/local/samplesheet_check/main.nf b/modules/local/samplesheet_check/main.nf
index adb8a92..f0437a6 100644
--- a/modules/local/samplesheet_check/main.nf
+++ b/modules/local/samplesheet_check/main.nf
@@ -1,5 +1,4 @@
-nextflow.enable.dsl=2
-
+// Source:
 // https://github.com/nf-core/rnaseq
 // MIT: https://github.com/nf-core/rnaseq/blob/master/LICENSE
 //
diff --git a/modules/local/validate_params/main.nf b/modules/local/validate_params/main.nf
index f40d2ac..7b5697f 100644
--- a/modules/local/validate_params/main.nf
+++ b/modules/local/validate_params/main.nf
@@ -1,7 +1,3 @@
-nextflow.enable.dsl=2
-
-// https://github.com/Plant-Food-Research-Open/assembly_qc
-// GPL-3.0: https://github.com/Plant-Food-Research-Open/assembly_qc/blob/main/LICENSE
 def validateParams(params) {
     validateFastaTags(params)
     validateTETags(params)
diff --git a/subworkflows/local/extract_samples.nf b/subworkflows/local/extract_samples.nf
index d05de4c..e63bbe0 100644
--- a/subworkflows/local/extract_samples.nf
+++ b/subworkflows/local/extract_samples.nf
@@ -1,17 +1,15 @@
-nextflow.enable.dsl=2
-
+// Source:
 // https://github.com/nf-core/rnaseq
 // MIT: https://github.com/nf-core/rnaseq/blob/master/LICENSE
 //
+// Check input samplesheet and get read channels
+//
 // Changes:
 // Added channel permissible_target_assemblies
 // Changed file name from input_check.nf to extract_samples.nf
 // Removed strandedness
 // Nowing emitting an extra channel 'assemblies' which indicates the
 // assemblies targeted by each read
-//
-// Check input samplesheet and get read channels
-//
 
 include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check'
 
diff --git a/subworkflows/local/fasta_edta.nf b/subworkflows/local/fasta_edta.nf
new file mode 100644
index 0000000..f55a958
--- /dev/null
+++ b/subworkflows/local/fasta_edta.nf
@@ -0,0 +1,43 @@
+include { SHORTEN_EDTA_IDS  } from '../../modules/local/edta/shorten_edta_ids'
+include { EDTA              } from '../../modules/local/edta/edta'
+include { RESTORE_EDTA_IDS  } from '../../modules/local/edta/restore_edta_ids'
+
+workflow FASTA_EDTA {
+    take:
+    genome_fasta    // channel: [ meta, /path/fasta ]
+    
+    main:
+    SHORTEN_EDTA_IDS(genome_fasta)
+    .renamed_ids_fasta
+    | EDTA
+
+    RESTORE_EDTA_IDS(
+        EDTA.out.te_lib_fasta,
+        EDTA.out.intact_gff3.map { it[1] },
+        EDTA.out.pass_list.map { it[1] },
+        EDTA.out.out_file.map { it[1] },
+        EDTA.out.te_anno_gff3.map { it[1] },
+        SHORTEN_EDTA_IDS.out.renamed_ids_tsv.map { it[1] }
+    )
+
+    Channel.empty()
+    | mix(
+        SHORTEN_EDTA_IDS.out.versions.first()
+    )
+    | mix(
+        EDTA.out.versions.first()
+    )
+    | mix(
+        RESTORE_EDTA_IDS.out.versions.first()
+    )
+    | set { ch_versions }
+    
+    emit:
+    te_lib_fasta    = RESTORE_EDTA_IDS.out.te_lib_fasta     // channel: [ meta, /path/fasta ]
+    intact_gff3     = RESTORE_EDTA_IDS.out.intact_gff3      // channel: [ meta, /path/gff3 ]
+    pass_list       = RESTORE_EDTA_IDS.out.pass_list        // channel: [ meta, /path/pass.list ]
+    out_file        = RESTORE_EDTA_IDS.out.out_file         // channel: [ meta, /path/out.file ]
+    te_anno_gff3    = RESTORE_EDTA_IDS.out.te_anno_gff3     // channel: [ meta, /path/gff3 ]
+    renamed_ids_tsv = RESTORE_EDTA_IDS.out.renamed_ids_tsv  // channel: [ meta, /path/tsv ]
+    versions        = ch_versions                           // channel: [ versions.yml ]
+}
\ No newline at end of file
diff --git a/subworkflows/local/perform_edta_annotation.nf b/subworkflows/local/perform_edta_annotation.nf
deleted file mode 100644
index d362934..0000000
--- a/subworkflows/local/perform_edta_annotation.nf
+++ /dev/null
@@ -1,48 +0,0 @@
-nextflow.enable.dsl=2
-
-include { SHORTEN_EDTA_IDS  } from '../../modules/local/edta/shorten_edta_ids'
-include { EDTA              } from '../../modules/local/edta/edta'
-include { RESTORE_EDTA_IDS  } from '../../modules/local/edta/restore_edta_ids'
-
-// https://github.com/Plant-Food-Research-Open/assembly_qc
-// GPL-3.0: https://github.com/Plant-Food-Research-Open/assembly_qc/blob/main/LICENSE
-workflow PERFORM_EDTA_ANNOTATION {
-    take:
-        genome_fasta    // [meta, /path/to/genome/fasta]
-    
-    main:
-        
-        SHORTEN_EDTA_IDS(genome_fasta)
-        .renamed_ids_fasta
-        | EDTA
-
-        RESTORE_EDTA_IDS(
-            EDTA.out.te_lib_fasta,
-            EDTA.out.intact_gff3.map { it[1] },
-            EDTA.out.pass_list.map { it[1] },
-            EDTA.out.out_file.map { it[1] },
-            EDTA.out.te_anno_gff3.map { it[1] },
-            SHORTEN_EDTA_IDS.out.renamed_ids_tsv.map { it[1] }
-        )
-
-        Channel.empty()
-        | mix(
-            SHORTEN_EDTA_IDS.out.versions.first()
-        )
-        | mix(
-            EDTA.out.versions.first()
-        )
-        | mix(
-            RESTORE_EDTA_IDS.out.versions.first()
-        )
-        | set { ch_versions }
-    
-    emit:
-        te_lib_fasta    = RESTORE_EDTA_IDS.out.te_lib_fasta
-        intact_gff3     = RESTORE_EDTA_IDS.out.intact_gff3
-        pass_list       = RESTORE_EDTA_IDS.out.pass_list
-        out_file        = RESTORE_EDTA_IDS.out.out_file
-        te_anno_gff3    = RESTORE_EDTA_IDS.out.te_anno_gff3
-        renamed_ids_tsv = RESTORE_EDTA_IDS.out.renamed_ids_tsv
-        versions        = ch_versions
-}
\ No newline at end of file

From e63e22fea1ec2c240c5299869ed30b058a781827 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Fri, 10 Nov 2023 14:01:48 +1300
Subject: [PATCH 08/59] Now using galaxy containers

---
 TODO.md                         | 3 +--
 modules/local/edta/edta/main.nf | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/TODO.md b/TODO.md
index 6f0a836..41d8b8c 100644
--- a/TODO.md
+++ b/TODO.md
@@ -1,4 +1,3 @@
 - [ ] Extract subworkflows
 - [ ] STAR ignores softmasking and, thus, should be fed the unmasked genome so that masking and mapping can run in parallel.
-- [ ] Add --eval=reference.gtf
-- [ ] Replace quay containers with galaxyproject cache containers.
\ No newline at end of file
+- [ ] Add --eval=reference.gtf
\ No newline at end of file
diff --git a/modules/local/edta/edta/main.nf b/modules/local/edta/edta/main.nf
index 56fd196..2e6d759 100644
--- a/modules/local/edta/edta/main.nf
+++ b/modules/local/edta/edta/main.nf
@@ -3,7 +3,7 @@ process EDTA {
     label "process_high"
     label "process_week_long"
     
-    container 'quay.io/biocontainers/edta:2.1.0--hdfd78af_1'
+    container 'https://depot.galaxyproject.org/singularity/edta:2.1.0--hdfd78af_1'
     containerOptions "-B $TMPDIR:$TMPDIR"
 
     input:

From 19783086736688fe3349c264ac1b83f8f9e67e71 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Fri, 10 Nov 2023 15:44:53 +1300
Subject: [PATCH 09/59] Extracted some subworkflows

---
 subworkflows/local/align_rnaseq.nf      |  70 +++++++
 subworkflows/local/fasta_edta.nf        |  14 +-
 subworkflows/local/prepare_assembly.nf  | 101 +++++++++
 subworkflows/local/preprocess_rnaseq.nf |  95 +++++++++
 workflows/pan_gene.nf                   | 261 ++++--------------------
 5 files changed, 317 insertions(+), 224 deletions(-)
 create mode 100644 subworkflows/local/align_rnaseq.nf
 create mode 100644 subworkflows/local/prepare_assembly.nf
 create mode 100644 subworkflows/local/preprocess_rnaseq.nf

diff --git a/subworkflows/local/align_rnaseq.nf b/subworkflows/local/align_rnaseq.nf
new file mode 100644
index 0000000..be7f026
--- /dev/null
+++ b/subworkflows/local/align_rnaseq.nf
@@ -0,0 +1,70 @@
+include { STAR_ALIGN    } from '../../modules/nf-core/star/align'
+include { SAMTOOLS_CAT  } from '../../modules/nf-core/samtools/cat'
+
+workflow ALIGN_RNASEQ {
+    take:
+    reads_target    // channel: [ meta, assembly_id ]
+    trim_reads      // channel: [ meta, [ fq ] ]
+    assembly_index  // channel: [ meta2, star_index ]
+    
+    main:
+    // MODULE: STAR_ALIGN
+    reads_target
+    | combine(trim_reads, by:0)
+    | map { meta, assembly, fastq ->
+        [assembly, [id:"${meta.id}.on.${assembly}", single_end:meta.single_end, target_assembly:assembly], fastq]
+    }
+    | combine(
+        assembly_index.map { meta, index -> [meta.id, index] },
+        by:0
+    )
+    | map { assembly, meta, fastq, index -> [meta, fastq, index] }
+    | set { ch_star_inputs }
+
+    def seq_platform = false
+    def seq_center = false
+    STAR_ALIGN(
+        ch_star_inputs.map { meta, fastq, index -> [meta, fastq] },
+        ch_star_inputs.map { meta, fastq, index -> [[id: meta.target_assembly], index] },
+        ch_star_inputs.map { meta, fastq, index -> [[id: meta.target_assembly], []] },
+        star_ignore_sjdbgtf,
+        seq_platform,
+        seq_center
+    )
+    .bam_sorted
+    | set { ch_star_bam }
+
+    // MODULE: SAMTOOLS_CAT
+    ch_star_bam
+    | map { meta, bam ->
+        [
+            [id: meta.target_assembly],
+            bam instanceof List ? bam.find {it =~ /Aligned/} : bam
+        ]
+    }
+    | groupTuple
+    | branch { meta, bamList ->
+        bams: bamList.size() > 1
+        bam: bamList.size() <= 1
+    }
+    | set { ch_star_bam_branch }
+
+    SAMTOOLS_CAT(
+        ch_star_bam_branch.bams
+    )
+    .bam
+    | map { meta, bam -> [meta, [bam]] }
+    | mix(
+        ch_star_bam_branch.bam
+    )
+    | set { ch_samtools_bam }
+
+    Channel.empty()
+    | mix(STAR_ALIGN.out.versions.first())
+    | mix(SAMTOOLS_CAT.out.versions.first())
+    | set { ch_versions }
+    
+    emit:
+    bam         = ch_samtools_bam   // channel: [ [ id, single_end, target_assembly ], [ bam ] ]
+    versions    = ch_versions       // channel: [ versions.yml ]
+}
\ No newline at end of file
diff --git a/subworkflows/local/fasta_edta.nf b/subworkflows/local/fasta_edta.nf
index f55a958..c47e557 100644
--- a/subworkflows/local/fasta_edta.nf
+++ b/subworkflows/local/fasta_edta.nf
@@ -4,7 +4,7 @@ include { RESTORE_EDTA_IDS  } from '../../modules/local/edta/restore_edta_ids'
 
 workflow FASTA_EDTA {
     take:
-    genome_fasta    // channel: [ meta, /path/fasta ]
+    genome_fasta    // channel: [ meta, fasta ]
     
     main:
     SHORTEN_EDTA_IDS(genome_fasta)
@@ -33,11 +33,11 @@ workflow FASTA_EDTA {
     | set { ch_versions }
     
     emit:
-    te_lib_fasta    = RESTORE_EDTA_IDS.out.te_lib_fasta     // channel: [ meta, /path/fasta ]
-    intact_gff3     = RESTORE_EDTA_IDS.out.intact_gff3      // channel: [ meta, /path/gff3 ]
-    pass_list       = RESTORE_EDTA_IDS.out.pass_list        // channel: [ meta, /path/pass.list ]
-    out_file        = RESTORE_EDTA_IDS.out.out_file         // channel: [ meta, /path/out.file ]
-    te_anno_gff3    = RESTORE_EDTA_IDS.out.te_anno_gff3     // channel: [ meta, /path/gff3 ]
-    renamed_ids_tsv = RESTORE_EDTA_IDS.out.renamed_ids_tsv  // channel: [ meta, /path/tsv ]
+    te_lib_fasta    = RESTORE_EDTA_IDS.out.te_lib_fasta     // channel: [ meta, fasta ]
+    intact_gff3     = RESTORE_EDTA_IDS.out.intact_gff3      // channel: [ meta, gff3 ]
+    pass_list       = RESTORE_EDTA_IDS.out.pass_list        // channel: [ meta, pass.list ]
+    out_file        = RESTORE_EDTA_IDS.out.out_file         // channel: [ meta, out.file ]
+    te_anno_gff3    = RESTORE_EDTA_IDS.out.te_anno_gff3     // channel: [ meta, gff3 ]
+    renamed_ids_tsv = RESTORE_EDTA_IDS.out.renamed_ids_tsv  // channel: [ meta, tsv ]
     versions        = ch_versions                           // channel: [ versions.yml ]
 }
\ No newline at end of file
diff --git a/subworkflows/local/prepare_assembly.nf b/subworkflows/local/prepare_assembly.nf
new file mode 100644
index 0000000..1cecb72
--- /dev/null
+++ b/subworkflows/local/prepare_assembly.nf
@@ -0,0 +1,101 @@
+include { GUNZIP as GUNZIP_TARGET_ASSEMBLY      } from '../../modules/nf-core/gunzip'
+include { GUNZIP as GUNZIP_TE_LIBRARY           } from '../../modules/nf-core/gunzip'
+include { FASTA_VALIDATE                        } from '../../modules/local/fasta_validate'
+include { REPEATMASKER                          } from '../../modules/kherronism/repeatmasker'
+include { STAR_GENOMEGENERATE                   } from '../../modules/nf-core/star/genomegenerate'
+
+include { FASTA_EDTA                            } from '../subworkflows/local/fasta_edta'
+
+workflow PREPARE_ASSEMBLY {
+    take:
+    target_assembly     // channel: [ meta, fasta ]
+    te_library          // channel: [ meta, fasta ]
+
+    main:
+    // MODULE: GUNZIP_TARGET_ASSEMBLY
+    target_assembly
+    | branch { meta, file ->
+        gz: "$file".endsWith(".gz")
+        rest: !"$file".endsWith(".gz")
+    }
+    | set { tech_target_assembly_branch }
+
+    GUNZIP_TARGET_ASSEMBLY(
+        tech_target_assembly_branch.gz
+    )
+    .gunzip
+    | mix(
+        tech_target_assembly_branch.rest
+    )
+    | set { ch_gunzip_target_assembly }
+
+    // MODULE: FASTA_VALIDATE
+    FASTA_VALIDATE(ch_gunzip_target_assembly)
+    .valid_fasta
+    | set { ch_validated_target_assembly }
+
+    // MODULE: GUNZIP_TE_LIBRARY
+    te_library
+    | branch { meta, file ->
+        gz: "$file".endsWith(".gz")
+        rest: !"$file".endsWith(".gz")
+    }
+    | set { ch_te_library_branch }
+
+    GUNZIP_TE_LIBRARY(
+        ch_te_library_branch.gz
+    )
+    .gunzip
+    | mix(
+        ch_te_library_branch.rest
+    )
+    | set { ch_gunzip_te_library }
+
+    // SUBWORKFLOW: FASTA_EDTA
+    ch_validated_target_assembly
+    | join(
+        ch_gunzip_te_library, remainder: true
+    )
+    | filter { meta, assembly, teLib ->
+        teLib == null
+    }
+    | map { meta, assembly, teLib -> [meta, assembly] }
+    | FASTA_EDTA
+    
+    // MODULE: REPEATMASKER
+    ch_validated_target_assembly
+    | join(
+        FASTA_EDTA.out.te_lib_fasta.mix(ch_gunzip_te_library)
+    )
+    | set { ch_assembly_n_te_lib }
+
+    REPEATMASKER(
+        ch_assembly_n_te_lib.map { meta, assembly, teLib -> [meta, assembly] },
+        ch_assembly_n_te_lib.map { meta, assembly, teLib -> teLib },
+    )
+
+    // MODULE: STAR_GENOMEGENERATE
+    def star_ignore_sjdbgtf = true
+    STAR_GENOMEGENERATE(
+        REPEATMASKER.out.fasta_masked,
+        REPEATMASKER.out.fasta_masked.map { meta, maskedFasta -> [meta, []] },
+        star_ignore_sjdbgtf
+    )
+    .index
+    | set { ch_assembly_index }
+
+    Channel.empty()
+    | mix(FASTA_VALIDATE.out.versions.first())
+    | mix(GUNZIP_TE_LIBRARY.out.versions.first())
+    | mix(FASTA_EDTA.out.versions)
+    | mix(REPEATMASKER.out.versions.first())
+    | mix(STAR_GENOMEGENERATE.out.versions.first())
+    | mix(GUNZIP_TARGET_ASSEMBLY.out.versions.first())
+    | set { ch_versions }
+    
+    emit:
+    target_assemby                                          // channel: [ meta, fasta ]
+    masked_target_assembly  = REPEATMASKER.out.fasta_masked // channel: [ meta, fasta ]
+    target_assemby_index    = ch_assembly_index             // channel: [ meta, star_index ]
+    versions                = ch_versions                   // channel: [ versions.yml ]
+}
\ No newline at end of file
diff --git a/subworkflows/local/preprocess_rnaseq.nf b/subworkflows/local/preprocess_rnaseq.nf
new file mode 100644
index 0000000..8223910
--- /dev/null
+++ b/subworkflows/local/preprocess_rnaseq.nf
@@ -0,0 +1,95 @@
+include { CAT_FASTQ                     } from '../../modules/nf-core/cat/fastq'
+include { SORTMERNA                     } from '../../modules/nf-core/sortmerna'
+include { EXTRACT_SAMPLES               } from '../../subworkflows/local/extract_samples'
+include { FASTQ_FASTQC_UMITOOLS_FASTP   } from '../../subworkflows/nf-core/fastq_fastqc_umitools_fastp'
+
+workflow PREPROCESS_RNASEQ {
+    take:
+    samplesheet                     // path: csv
+    permissible_target_assemblies   // val: assembly_a,assembly_b
+    skip_fastqc                     // val: true|false
+    skip_fastp                      // val: true|false
+    save_trimmed                    // val: true|false
+    min_trimmed_reads               // val: Integer
+    remove_ribo_rna                 // val: true|false
+    sortmerna_fastas                // channel: [ [ fasta ] ]
+    
+    main:
+    ch_versions = Channel.empty()
+    // SUBWORKFLOW: EXTRACT_SAMPLES
+    EXTRACT_SAMPLES(
+        samplesheet,
+        ch_permissible_target_assemblies
+    )
+    .reads
+    | map { meta, fastq ->
+        groupID = meta.id - ~/_T\d+/
+        [ meta + [id: groupID], fastq ]
+    }
+    | groupTuple()
+    | branch { meta, fastq ->
+        single  : fastq.size() == 1
+            return [ meta, fastq.flatten() ]
+        multiple: fastq.size() > 1
+            return [ meta, fastq.flatten() ]
+    }
+    | set { ch_fastq }
+
+    EXTRACT_SAMPLES.out.assemblies
+    | map { meta, assembly ->
+        groupID = meta.id - ~/_T\d+/
+        [ meta + [id: groupID], assembly ]
+    }
+    | unique
+    | set { ch_reads_target }
+
+    // MODULES: CAT_FASTQ
+    CAT_FASTQ (
+        ch_fastq.multiple
+    )
+    .reads
+    | mix(ch_fastq.single)
+    | set { ch_cat_fastq }
+
+    // SUBWORKFLOW: FASTQ_FASTQC_UMITOOLS_FASTP
+    def with_umi            = false
+    def skip_umi_extract    = true
+    def umi_discard_read    = false
+    FASTQ_FASTQC_UMITOOLS_FASTP (
+        ch_cat_fastq,
+        skip_fastqc,
+        with_umi,
+        skip_umi_extract,
+        umi_discard_read,
+        skip_fastp,
+        [],
+        save_trimmed,
+        save_trimmed,
+        min_trimmed_reads
+    )
+    .reads
+    | set { ch_trim_reads }
+
+    // MODULE: SORTMERNA
+    if (remove_ribo_rna) {
+        SORTMERNA (
+            ch_trim_reads,
+            sortmerna_fastas
+        )
+        .reads
+        | set { ch_sortmerna_reads }
+
+        ch_versions = ch_versions.mix(SORTMERNA.out.versions.first())
+    }
+
+    ch_versions
+    | mix(EXTRACT_SAMPLES.out.versions)
+    | mix(CAT_FASTQ.out.versions.first())
+    | mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions)
+    | set { ch_versions }
+
+    emit:
+    trim_reads      = remove_ribo_rna ? ch_sortmerna_reads : ch_trim_reads  // channel: [ meta, [ fq ] ]
+    reads_target    = ch_reads_target                                       // channel: [ meta, assembly_id ]
+    versions        = ch_versions                                           // channel: [ versions.yml ]
+}
\ No newline at end of file
diff --git a/workflows/pan_gene.nf b/workflows/pan_gene.nf
index 2198765..a0dc6e8 100644
--- a/workflows/pan_gene.nf
+++ b/workflows/pan_gene.nf
@@ -1,26 +1,17 @@
 nextflow.enable.dsl=2
 
-include { GUNZIP as GUNZIP_TARGET_ASSEMBLY      } from '../modules/nf-core/gunzip'
-include { GUNZIP as GUNZIP_TE_LIBRARY           } from '../modules/nf-core/gunzip'
+
 include { GUNZIP as GUNZIP_EXTERNAL_PROTEIN_SEQ } from '../modules/nf-core/gunzip'
-include { FASTA_VALIDATE                        } from '../modules/local/fasta_validate'
-include { REPEATMASKER                          } from '../modules/kherronism/repeatmasker'
-include { STAR_GENOMEGENERATE                   } from '../modules/nf-core/star/genomegenerate'
-include { CAT_FASTQ                             } from '../modules/nf-core/cat/fastq'
-include { SORTMERNA                             } from '../modules/nf-core/sortmerna'
-include { STAR_ALIGN                            } from '../modules/nf-core/star/align'
-include { SAMTOOLS_CAT                          } from '../modules/nf-core/samtools/cat'
 include { CAT_CAT as CAT_PROTEIN_SEQS           } from '../modules/nf-core/cat/cat'
 include { BRAKER3                               } from '../modules/kherronism/braker3'
 include { GUNZIP as GUNZIP_XREF_FASTA           } from '../modules/nf-core/gunzip'
 include { GUNZIP as GUNZIP_XREF_GFF             } from '../modules/nf-core/gunzip'
-
-include { PERFORM_EDTA_ANNOTATION               } from '../subworkflows/local/perform_edta_annotation'
-include { EXTRACT_SAMPLES                       } from '../subworkflows/local/extract_samples'
-include { FASTQ_FASTQC_UMITOOLS_FASTP           } from '../subworkflows/nf-core/fastq_fastqc_umitools_fastp'
-
 include { validateParams                        } from '../modules/local/validate_params'
 
+include { PREPARE_ASSEMBLY                      } from '../subworkflows/local/prepare_assembly'
+include { PREPROCESS_RNASEQ                     } from '../subworkflows/local/preprocess_rnaseq'
+include { ALIGN_RNASEQ                          } from '../subworkflows/local/align_rnaseq'
+
 validateParams(params)
 
 // Additional validation
@@ -34,230 +25,66 @@ workflow PAN_GENE {
 
     // Versions
     ch_versions = Channel.empty()
-    
-    // MODULE: GUNZIP_TARGET_ASSEMBLY
+
+    // Input channels
     Channel.fromList(params.target_assemblies)
     | map { tag, filePath ->
         [[id:tag], file(filePath, checkIfExists: true)]
     }
-    | branch { meta, file ->
-        gz: "$file".endsWith(".gz")
-        rest: !"$file".endsWith(".gz")
-    }
-    | set { ch_target_assemblies }
-
-    GUNZIP_TARGET_ASSEMBLY(
-        ch_target_assemblies.gz
-    )
-    .gunzip
-    | mix(
-        ch_target_assemblies.rest
-    )
-    | set { ch_gunzip_target_assemblies }
-
-    ch_versions = ch_versions.mix(GUNZIP_TARGET_ASSEMBLY.out.versions.first())
-
-    // MODULE: FASTA_VALIDATE
-    FASTA_VALIDATE(ch_gunzip_target_assemblies)
-    .valid_fasta
-    | set { ch_validated_target_assemblies }
-
-    ch_versions = ch_versions.mix(FASTA_VALIDATE.out.versions.first())
+    | set { ch_target_assembly }
 
-    // MODULE: GUNZIP_TE_LIBRARY
     Channel.fromList(params.te_libraries)
     | map { tag, filePath ->
         [[id:tag], file(filePath, checkIfExists: true)]
     }
-    | branch { meta, file ->
-        gz: "$file".endsWith(".gz")
-        rest: !"$file".endsWith(".gz")
-    }
-    | set { ch_te_libraries }
-
-    GUNZIP_TE_LIBRARY(
-        ch_te_libraries.gz
-    )
-    .gunzip
-    | mix(
-        ch_te_libraries.rest
-    )
-    | set { ch_gunzip_te_libraries }
-
-    ch_versions = ch_versions.mix(GUNZIP_TE_LIBRARY.out.versions.first())
+    | set { ch_te_library }
 
-    // SUBWORKFLOW: PERFORM_EDTA_ANNOTATION
-    ch_validated_target_assemblies
-    | join(
-        ch_gunzip_te_libraries, remainder: true
-    )
-    | filter { meta, assembly, teLib ->
-        teLib == null
+    ch_samplesheet = Channel.empty()
+    if(params.samplesheet) {
+        ch_samplesheet = Channel.fromPath(params.samplesheet)
     }
-    | map {meta, assembly, teLib -> [meta, assembly]}
-    | PERFORM_EDTA_ANNOTATION
-
-    ch_versions = ch_versions.mix(PERFORM_EDTA_ANNOTATION.out.versions)
     
-    // MODULE: REPEATMASKER
-    ch_validated_target_assemblies
-    | join(
-        PERFORM_EDTA_ANNOTATION.out.te_lib_fasta.mix(ch_gunzip_te_libraries)
-    )
-    | set { ch_assemblies_n_te_libs }
-
-    REPEATMASKER(
-        ch_assemblies_n_te_libs.map { meta, assembly, teLib -> [meta, assembly] },
-        ch_assemblies_n_te_libs.map { meta, assembly, teLib -> teLib },
-    )
+    Channel.of(params.target_assemblies.collect { tag, fastaPath -> tag.strip() }.join(","))
+    | set { ch_permissible_target_assemblies }
 
-    ch_versions = ch_versions.mix(REPEATMASKER.out.versions.first())
+    Channel.from(ch_ribo_db.readLines())
+    | map { row -> file(row, checkIfExists: true) }
+    | collect
+    | set { ch_sortmerna_fastas }
 
-    // MODULE: STAR_GENOMEGENERATE
-    def star_ignore_sjdbgtf = true
-    STAR_GENOMEGENERATE(
-        REPEATMASKER.out.fasta_masked,
-        REPEATMASKER.out.fasta_masked.map { meta, maskedFasta -> [meta, []] },
-        star_ignore_sjdbgtf
+    // SUBWORKFLOW: PREPARE_ASSEMBLY
+    PREPARE_ASSEMBLY(
+        ch_target_assembly,
+        ch_te_library
     )
-    .index
-    | set { ch_assembly_index }
 
-    ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions.first())
+    ch_valid_target_assembly    = PREPARE_ASSEMBLY.out.target_assemby
+    ch_masked_target_assembly   = PREPARE_ASSEMBLY.out.masked_target_assembly
+    ch_target_assemby_index     = PREPARE_ASSEMBLY.out.target_assemby_index
+    ch_versions                 = ch_versions.mix(PREPARE_ASSEMBLY.out.versions)
 
-    // SUBWORKFLOW: EXTRACT_SAMPLES
-    ch_samplesheet_path = Channel.empty()
-    if(params.samplesheet != null) {
-        ch_samplesheet_path = Channel.fromPath(params.samplesheet)
-    }
-    
-    EXTRACT_SAMPLES(
-        ch_samplesheet_path,
-        Channel.of(params.target_assemblies.collect { tag, fastaPath -> tag.strip() }.join(","))
+    // SUBWORKFLOW: PREPROCESS_RNASEQ
+    PREPROCESS_RNASEQ(
+        ch_samplesheet,
+        ch_permissible_target_assemblies,
+        params.skip_fastqc,
+        params.skip_fastp,
+        params.save_trimmed,
+        params.min_trimmed_reads,
+        params.remove_ribo_rna,
+        ch_sortmerna_fastas
     )
-    .reads
-    | map { meta, fastq ->
-        groupID = meta.id - ~/_T\d+/
-        [ meta + [id: groupID], fastq ]
-    }
-    | groupTuple()
-    | branch { meta, fastq ->
-        single  : fastq.size() == 1
-            return [ meta, fastq.flatten() ]
-        multiple: fastq.size() > 1
-            return [ meta, fastq.flatten() ]
-    }
-    | set { ch_fastq }
 
-    ch_read_target_assemblies = EXTRACT_SAMPLES.out.assemblies
-    ch_versions = ch_versions.mix(EXTRACT_SAMPLES.out.versions)
+    ch_trim_reads               = PREPROCESS_RNASEQ.out.trim_reads
+    ch_reads_target             = PREPROCESS_RNASEQ.out.reads_target
+    ch_versions                 = ch_versions.mix(PREPROCESS_RNASEQ.out.versions)
 
-    // MODULES: CAT_FASTQ
-    CAT_FASTQ (
-        ch_fastq.multiple
-    )
-    .reads
-    | mix(ch_fastq.single)
-    | set { ch_cat_fastq }
-    
-    ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first())
-
-    // SUBWORKFLOW: FASTQ_FASTQC_UMITOOLS_FASTP
-    def with_umi            = false
-    def skip_umi_extract    = true
-    def umi_discard_read    = false
-    FASTQ_FASTQC_UMITOOLS_FASTP (
-        ch_cat_fastq,
-        params.sample_prep.skip_fastqc,
-        with_umi,
-        skip_umi_extract,
-        umi_discard_read,
-        params.sample_prep.skip_fastp,
-        [],
-        params.sample_prep.save_trimmed,
-        params.sample_prep.save_trimmed,
-        params.sample_prep.min_trimmed_reads
+    // SUBWORKFLOW: STAR_ALIGN
+    ALIGN_RNASEQ(
+        ch_reads_target,
+        ch_trim_reads,
+        ch_target_assemby_index
     )
-    .reads
-    | set { ch_trim_reads }
-
-    ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions)
-
-    // MODULE: SORTMERNA
-    if (params.sample_prep.remove_ribo_rna) {
-        Channel.from(ch_ribo_db.readLines())
-        | map { row -> file(row, checkIfExists: true) }
-        | collect
-        | set { ch_sortmerna_fastas }
-
-        SORTMERNA (
-            ch_trim_reads,
-            ch_sortmerna_fastas
-        )
-        .reads
-        | set { ch_trim_reads }
-
-        ch_versions = ch_versions.mix(SORTMERNA.out.versions.first())
-    }
-
-    // MODULE: STAR_ALIGN
-    ch_read_target_assemblies
-    | map { meta, assembly ->
-        groupID = meta.id - ~/_T\d+/
-        [ meta + [id: groupID], assembly ]
-    }
-    | unique
-    | combine(ch_trim_reads, by:0)
-    | map { meta, assembly, fastq ->
-        [assembly, [id:"${meta.id}.on.${assembly}", single_end:meta.single_end, target_assembly:assembly], fastq]
-    }
-    | combine(
-        ch_assembly_index.map { meta, index -> [meta.id, index] },
-        by:0
-    )
-    | map { assembly, meta, fastq, index -> [meta, fastq, index] }
-    | set { ch_star_inputs }
-
-    def seq_platform = false
-    def seq_center = false
-    STAR_ALIGN(
-        ch_star_inputs.map { meta, fastq, index -> [meta, fastq] },
-        ch_star_inputs.map { meta, fastq, index -> [[id: meta.target_assembly], index] },
-        ch_star_inputs.map { meta, fastq, index -> [[id: meta.target_assembly], []] },
-        star_ignore_sjdbgtf,
-        seq_platform,
-        seq_center
-    )
-    .bam_sorted
-    | set { ch_star_bam }
-
-    ch_versions = ch_versions.mix(STAR_ALIGN.out.versions.first())
-
-    // MODULE: SAMTOOLS_CAT
-    ch_star_bam
-    | map { meta, bam ->
-        [
-            [id: meta.target_assembly],
-            bam instanceof List ? bam.find {it =~ /Aligned/} : bam
-        ]
-    }
-    | groupTuple
-    | branch { meta, bamList ->
-        bams: bamList.size() > 1
-        bam: bamList.size() <= 1
-    }
-    | set { ch_star_bam_branch }
-
-    SAMTOOLS_CAT(
-        ch_star_bam_branch.bams
-    )
-    .bam.map { meta, bam -> [meta, [bam]] }
-    | mix(
-        ch_star_bam_branch.bam
-    )
-    | set { ch_samtools_bam }
-
-    ch_versions = ch_versions.mix(SAMTOOLS_CAT.out.versions.first())
 
     // MODULE: GUNZIP_EXTERNAL_PROTEIN_SEQ
     ch_ext_prot_seqs = Channel.empty()

From 10a015815b4839823af3571a66bfb5e9aa95b631 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Mon, 13 Nov 2023 12:20:37 +1300
Subject: [PATCH 10/59] Extracted a few subworkflows

---
 TODO.md                                     |   2 -
 conf/modules.config                         |  28 +-
 modules/local/edta/shorten_edta_ids/main.nf |  14 -
 modules/local/validate_params/main.nf       |   6 +
 nextflow.config                             |  90 +++---
 subworkflows/local/align_rnaseq.nf          |   5 +-
 subworkflows/local/prepare_assembly.nf      |   8 +-
 subworkflows/local/prepare_ext_prots.nf     |  40 +++
 subworkflows/local/preprocess_rnaseq.nf     |   2 +-
 workflows/pan_gene.nf                       | 297 +++++++++-----------
 10 files changed, 235 insertions(+), 257 deletions(-)
 create mode 100644 subworkflows/local/prepare_ext_prots.nf

diff --git a/TODO.md b/TODO.md
index 41d8b8c..6e1e66c 100644
--- a/TODO.md
+++ b/TODO.md
@@ -1,3 +1 @@
-- [ ] Extract subworkflows
-- [ ] STAR ignores softmasking and, thus, should be fed the unmasked genome so that masking and mapping can run in parallel.
 - [ ] Add --eval=reference.gtf
\ No newline at end of file
diff --git a/conf/modules.config b/conf/modules.config
index 132ffe9..6683fcc 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -1,7 +1,7 @@
 process {
     withName: 'EDTA' {
         ext.args = [
-            params.edta.is_sensitive ? "--sensitive 1" :  "--sensitive 0",
+            params.edta_is_sensitive ? "--sensitive 1" :  "--sensitive 0",
             "--anno 0",
             "--force 1"
         ].join(' ').trim()
@@ -12,7 +12,7 @@ process {
             path: { "${params.outdir}/edta/${meta.id}" },
             mode: "copy",
             saveAs: { filename -> filename.equals("versions.yml") ? null : filename },
-            enabled: params.edta.save_outputs
+            enabled: params.edta_save_outputs
         ]
     }
 
@@ -26,12 +26,12 @@ process {
             path: { "${params.outdir}/repeatmasker" },
             mode: "copy",
             saveAs: { filename -> filename.equals("versions.yml") ? null : filename },
-            enabled: params.repeatmasker.save_outputs
+            enabled: params.repeatmasker_save_outputs
         ]
     }
 }
 
-if(!params.sample_prep.skip_fastqc) {
+if(!params.skip_fastqc) {
     process {
         withName: '.*:FASTQ_FASTQC_UMITOOLS_FASTP:FASTQC_RAW' {
         ext.args   = '--quiet'
@@ -48,10 +48,10 @@ if(!params.sample_prep.skip_fastqc) {
     }
 }
 
-if(!params.sample_prep.skip_fastp) {
+if(!params.skip_fastp) {
     process {
         withName: '.*:FASTQ_FASTQC_UMITOOLS_FASTP:FASTP' {
-            ext.args   = params.sample_prep.extra_fastp_args ?: ''
+            ext.args   = params.extra_fastp_args ?: ''
             publishDir = [
                 [
                     path: { "${params.outdir}/fastp/html" },
@@ -72,14 +72,14 @@ if(!params.sample_prep.skip_fastp) {
                     path: { "${params.outdir}/fastp" },
                     mode:  "copy",
                     pattern: "*.fastq.gz",
-                    enabled: params.sample_prep.save_trimmed
+                    enabled: params.save_trimmed
                 ]
             ]
         }
     }
 }
 
-if (params.sample_prep.remove_ribo_rna) {
+if (params.remove_ribo_rna) {
     process {
         withName: 'SORTMERNA' {
             ext.args   = '--num_alignments 1 -v'
@@ -93,7 +93,7 @@ if (params.sample_prep.remove_ribo_rna) {
                     path: { "${params.outdir}/sortmerna" },
                     mode: "copy",
                     pattern: "*.fastq.gz",
-                    enabled: params.sample_prep.save_non_ribo_reads
+                    enabled: params.save_non_ribo_reads
                 ]
             ]
         }
@@ -106,22 +106,22 @@ process {
             "--outSAMstrandField intronMotif",
             "--outSAMtype BAM SortedByCoordinate",
             "--readFilesCommand gunzip -c",
-            "--alignIntronMax ${params.star_align.max_intron_length}",
-            params.star_align.extra_star_align_args ? params.star_align.extra_star_align_args.split("\\s(?=--)") : ''
+            "--alignIntronMax ${params.star_max_intron_length}",
+            params.star_align_extra_args ? params.star_align_extra_args.split("\\s(?=--)") : ''
         ].flatten().unique(false).join(' ').trim()
         ext.prefix = { "${meta.id}" }
         publishDir = [
             path: { "${params.outdir}/star/alignment" },
             mode: "copy",
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
-            enabled: params.star_align.save_outputs
+            enabled: params.star_save_outputs
         ]
     }
 
     withName: BRAKER3 {
         ext.args = [
             "--gff3",
-            params.braker.extra_braker_args ? params.braker.extra_braker_args.split("\\s(?=--)") : ''
+            params.braker_extra_args ? params.braker_extra_args.split("\\s(?=--)") : ''
         ].flatten().unique(false).join(' ').trim()
         ext.prefix = { "${meta.id}" }
         publishDir = [
@@ -132,7 +132,7 @@ process {
     }
 }
 
-if(params.liftoff.xref_annotations) {
+if(params.liftoff_xref_annotations) {
     process {
         withName: LIFTOFF {
             ext.args = '-exclude_partial -copies'
diff --git a/modules/local/edta/shorten_edta_ids/main.nf b/modules/local/edta/shorten_edta_ids/main.nf
index e216ce4..43b94f0 100644
--- a/modules/local/edta/shorten_edta_ids/main.nf
+++ b/modules/local/edta/shorten_edta_ids/main.nf
@@ -27,18 +27,4 @@ process SHORTEN_EDTA_IDS {
         shorten_fasta_ids: \$(md5sum \$(which shorten_fasta_ids.py) | cut -d' ' -f1)
     END_VERSIONS
     """
-    
-    stub:
-    """
-    FILE="$fasta_file"
-    output_prefix="\${FILE%.*}"
-
-    touch "\${output_prefix}.renamed.ids.fa"
-    touch "\${output_prefix}.renamed.ids.tsv"
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        shorten_fasta_ids: \$(md5sum \$(which shorten_fasta_ids.py) | cut -d' ' -f1)
-    END_VERSIONS
-    """
 }
\ No newline at end of file
diff --git a/modules/local/validate_params/main.nf b/modules/local/validate_params/main.nf
index 7b5697f..e86302e 100644
--- a/modules/local/validate_params/main.nf
+++ b/modules/local/validate_params/main.nf
@@ -3,6 +3,12 @@ def validateParams(params) {
     validateTETags(params)
 
     validateTEFastaCorrespondence(params)
+
+    if (params.remove_ribo_rna) {
+        ch_ribo_db = file(params.ribo_database_manifest, checkIfExists: true)
+        
+        if (ch_ribo_db.isEmpty()) {exit 1, "File provided with --ribo_database_manifest is empty: ${ch_ribo_db.getName()}!"}
+    }
 }
 
 def validateFastaTags(params) {
diff --git a/nextflow.config b/nextflow.config
index daf1eef..3c630e9 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -5,85 +5,65 @@ params {
         ["red5_v2p1", "/workspace/hrauxr/pan-gene/.test/red5_v2p1_chr1.fasta"],
         ["donghong", "/workspace/hrauxr/pan-gene/.test/donghong.chr1.fsa.gz"]
     ]
-    // FASTA files (fasta, fasta.gz) for the assemblies to annotate
-    //
-    // Pattern:               [["tag", "file path"]]
-    // Permissible tags:      tag, tag_1, tag_tag2_3, tag_tag2_tag3;
-    //                        Any name with alphanumeric characters including "_".
-    //                        "." is not allowed in the tag name
-    //                        Unique, short tags are recommended.
-    //                        Otherwise, some of the plots in the report may not display correctly.
-    // Examples:
-    // target_assemblies        = [["tag1", "./a/relative/path/to/the/fasta/file.fasta"],
-    //                              ["tag2", "./a/relative/path/to/the/fasta/file2.fasta"],
-    //                              ["tag3", "https://ftp.ncbi.nlm.nih.gov/genomes/test_genome.fna"], ...]
-    // target_assemblies        = [["tair10", "/an/absolute/path/to/the/fasta/file.fasta"]]
+    // Pattern:             [ [tag, fasta(.gz) ] ]
+    // Permissible tags:    tag, tag_1, tag_tag2_3, tag_tag2_tag3;
+    //                      Any name with alphanumeric characters including "_".
+    //                      "." is not allowed in the tag name
     
     te_libraries                = [
         ["donghong", "/workspace/hrauxr/pan-gene/.test/donghong.TElib.fa.gz"]
     ]
-    // TE libs (fasta, fasta.gz) for target_assemblies
-    //
-    // Optional                 Set to [] if libraries are not available, te_libraries = []
+    // Pattern:             [ [tag, fasta(.gz) ] ]
+    // Optional             Set to null if libraries are not available.
     //
     // Each TE library should have an associated (by tag) assembly in target_assemblies.
     // Not all target_assemblies need to have an associated (by tag) TE library.
     // When the TE lib is not available for a traget assembly, EDTA is used to create one.
     
-    edta {
-        is_sensitive            = false
-        save_outputs            = true
-    }
-    repeatmasker {
-        save_outputs            = true
-    }
+    edta_is_sensitive           = false
+    edta_save_outputs           = false
+    
+    repeatmasker_save_outputs   = true
     
     samplesheet                 = "./.test/samplesheet.csv"
     // Optional: Set to null if not available
 
-    sample_prep {
-        skip_fastqc             = false
-        skip_fastp              = false
-        min_trimmed_reads       = 10000
-        extra_fastp_args        = ""
+    skip_fastqc                 = false
+    skip_fastp                  = false
+    min_trimmed_reads           = 10000
+    extra_fastp_args            = ""
 
-        save_trimmed            = false
-        // toggling this parameter results in rerun of FASTP and FASTQC_TRIM
+    save_trimmed                = false
+    // toggling this parameter results in rerun of FASTP and FASTQC_TRIM
 
-        remove_ribo_rna         = true
-        save_non_ribo_reads     = false
-        ribo_database_manifest  = "${projectDir}/assets/rrna-db-defaults.txt"
-    }
+    remove_ribo_rna             = true
+    save_non_ribo_reads         = false
+    ribo_database_manifest      = "${projectDir}/assets/rrna-db-defaults.txt"
 
-    star_align {
-        max_intron_length       = 16000
-        extra_star_align_args   = ""
-        save_outputs            = false
-    }
+    star_max_intron_length      = 16000
+    star_align_extra_args       = ""
+    star_save_outputs           = false
 
-    external_protein_seqs       = [
+    external_protein_fastas     = [
         "/workspace/ComparativeDataSources/OrthoDB11/Viridiplantae.fa.gz",
         "/output/genomic/fairGenomes/Plant/Actinidia/chinensis/var_chinensis/male/2x/assembly_russell/v2.1/RU01.20221115150135.pep.fasta"
     ]
     // Optional: Set to null if not available
 
-    braker {
-        extra_braker_args       = ""
-    }
+    braker_extra_args           = ""
 
-    liftoff {
-        xref_annotations        = [
-            [
-                "/output/genomic/fairGenomes/Plant/Actinidia/chinensis/var_chinensis/male/2x/assembly_russell/v2.1/Russell_V2a.chromosomes.and.unassiged.and.haplotigs.fsa",
-                "/output/genomic/fairGenomes/Plant/Actinidia/chinensis/var_chinensis/male/2x/assembly_russell/v2.1/RU01.20221115150135.gff3"
-            ],
-            [
-                "/output/genomic/fairGenomes/Plant/Arabidopsis/thaliana/var_na/sex_na/2x/assembly_tair/v10/TAIR10_chr_all.fas",
-                "/output/genomic/fairGenomes/Plant/Arabidopsis/thaliana/var_na/sex_na/2x/assembly_tair/v10/TAIR10_GFF3_genes_transposons.fixed.gff3"
-            ]
+    liftoff_xref_annotations    = [
+        [
+            "/output/genomic/fairGenomes/Plant/Actinidia/chinensis/var_chinensis/male/2x/assembly_russell/v2.1/Russell_V2a.chromosomes.and.unassiged.and.haplotigs.fsa",
+            "/output/genomic/fairGenomes/Plant/Actinidia/chinensis/var_chinensis/male/2x/assembly_russell/v2.1/RU01.20221115150135.gff3"
+        ],
+        [
+            "/output/genomic/fairGenomes/Plant/Arabidopsis/thaliana/var_na/sex_na/2x/assembly_tair/v10/TAIR10_chr_all.fas",
+            "/output/genomic/fairGenomes/Plant/Arabidopsis/thaliana/var_na/sex_na/2x/assembly_tair/v10/TAIR10_GFF3_genes_transposons.fixed.gff3"
         ]
-        // Optional: Set to null if not available
-    }
+    ]
+    // Format:      [ [ fasta(.gz), gff3(.gz) ] ]
+    // Optional:    Set to null if not available
 
     outdir                      = "./results"
 
diff --git a/subworkflows/local/align_rnaseq.nf b/subworkflows/local/align_rnaseq.nf
index be7f026..c0a9039 100644
--- a/subworkflows/local/align_rnaseq.nf
+++ b/subworkflows/local/align_rnaseq.nf
@@ -21,8 +21,9 @@ workflow ALIGN_RNASEQ {
     | map { assembly, meta, fastq, index -> [meta, fastq, index] }
     | set { ch_star_inputs }
 
-    def seq_platform = false
-    def seq_center = false
+    def star_ignore_sjdbgtf = true
+    def seq_platform        = false
+    def seq_center          = false
     STAR_ALIGN(
         ch_star_inputs.map { meta, fastq, index -> [meta, fastq] },
         ch_star_inputs.map { meta, fastq, index -> [[id: meta.target_assembly], index] },
diff --git a/subworkflows/local/prepare_assembly.nf b/subworkflows/local/prepare_assembly.nf
index 1cecb72..7469afc 100644
--- a/subworkflows/local/prepare_assembly.nf
+++ b/subworkflows/local/prepare_assembly.nf
@@ -4,7 +4,7 @@ include { FASTA_VALIDATE                        } from '../../modules/local/fast
 include { REPEATMASKER                          } from '../../modules/kherronism/repeatmasker'
 include { STAR_GENOMEGENERATE                   } from '../../modules/nf-core/star/genomegenerate'
 
-include { FASTA_EDTA                            } from '../subworkflows/local/fasta_edta'
+include { FASTA_EDTA                            } from '../../subworkflows/local/fasta_edta'
 
 workflow PREPARE_ASSEMBLY {
     take:
@@ -77,8 +77,8 @@ workflow PREPARE_ASSEMBLY {
     // MODULE: STAR_GENOMEGENERATE
     def star_ignore_sjdbgtf = true
     STAR_GENOMEGENERATE(
-        REPEATMASKER.out.fasta_masked,
-        REPEATMASKER.out.fasta_masked.map { meta, maskedFasta -> [meta, []] },
+        ch_validated_target_assembly,
+        ch_validated_target_assembly.map { meta, maskedFasta -> [meta, []] },
         star_ignore_sjdbgtf
     )
     .index
@@ -94,7 +94,7 @@ workflow PREPARE_ASSEMBLY {
     | set { ch_versions }
     
     emit:
-    target_assemby                                          // channel: [ meta, fasta ]
+    target_assemby          = ch_validated_target_assembly  // channel: [ meta, fasta ]
     masked_target_assembly  = REPEATMASKER.out.fasta_masked // channel: [ meta, fasta ]
     target_assemby_index    = ch_assembly_index             // channel: [ meta, star_index ]
     versions                = ch_versions                   // channel: [ versions.yml ]
diff --git a/subworkflows/local/prepare_ext_prots.nf b/subworkflows/local/prepare_ext_prots.nf
new file mode 100644
index 0000000..5109064
--- /dev/null
+++ b/subworkflows/local/prepare_ext_prots.nf
@@ -0,0 +1,40 @@
+include { GUNZIP                        } from '../../modules/nf-core/gunzip'
+include { CAT_CAT as CAT_PROTEIN_FASTAS } from '../../modules/nf-core/cat/cat'
+
+workflow PREPARE_EXT_PROTS {
+    take:
+    ch_ext_prot_fastas          // Channel: [ meta, fasta ]
+    
+    main:
+    ch_ext_prot_fastas
+    | branch { meta, file ->
+        gz: "$file".endsWith(".gz")
+        rest: !"$file".endsWith(".gz")
+    }
+    | set { ch_ext_prot_seqs_branch }
+
+    // MODULE: GUNZIP
+    GUNZIP(
+        ch_ext_prot_seqs_branch.gz
+    )
+    .gunzip
+    | mix(
+        ch_ext_prot_seqs_branch.rest
+    )
+    | set { ch_ext_prot_gunzip_fastas }
+
+    // MODULE: CAT_PROTEIN_FASTAS
+    ch_ext_prot_gunzip_fastas
+    | map { meta, filePath -> filePath }
+    | collect
+    | map { fileList -> [[id:"ext_protein_seqs"], fileList] }
+    | CAT_PROTEIN_FASTAS
+
+    GUNZIP.out.versions.first()
+    | mix(CAT_PROTEIN_FASTAS.out.versions)
+    | set { ch_versions }
+    
+    emit:
+    ext_prots_fasta = CAT_PROTEIN_FASTAS.out.file_out   // Channel: [ meta, fasta ]
+    versions        = ch_versions                       // Channel: [ versions.yml ]
+}
\ No newline at end of file
diff --git a/subworkflows/local/preprocess_rnaseq.nf b/subworkflows/local/preprocess_rnaseq.nf
index 8223910..7a82786 100644
--- a/subworkflows/local/preprocess_rnaseq.nf
+++ b/subworkflows/local/preprocess_rnaseq.nf
@@ -19,7 +19,7 @@ workflow PREPROCESS_RNASEQ {
     // SUBWORKFLOW: EXTRACT_SAMPLES
     EXTRACT_SAMPLES(
         samplesheet,
-        ch_permissible_target_assemblies
+        permissible_target_assemblies
     )
     .reads
     | map { meta, fastq ->
diff --git a/workflows/pan_gene.nf b/workflows/pan_gene.nf
index a0dc6e8..37e5920 100644
--- a/workflows/pan_gene.nf
+++ b/workflows/pan_gene.nf
@@ -1,8 +1,5 @@
 nextflow.enable.dsl=2
 
-
-include { GUNZIP as GUNZIP_EXTERNAL_PROTEIN_SEQ } from '../modules/nf-core/gunzip'
-include { CAT_CAT as CAT_PROTEIN_SEQS           } from '../modules/nf-core/cat/cat'
 include { BRAKER3                               } from '../modules/kherronism/braker3'
 include { GUNZIP as GUNZIP_XREF_FASTA           } from '../modules/nf-core/gunzip'
 include { GUNZIP as GUNZIP_XREF_GFF             } from '../modules/nf-core/gunzip'
@@ -11,46 +8,48 @@ include { validateParams                        } from '../modules/local/validat
 include { PREPARE_ASSEMBLY                      } from '../subworkflows/local/prepare_assembly'
 include { PREPROCESS_RNASEQ                     } from '../subworkflows/local/preprocess_rnaseq'
 include { ALIGN_RNASEQ                          } from '../subworkflows/local/align_rnaseq'
+include { PREPARE_EXT_PROTS                     } from '../subworkflows/local/prepare_ext_prots'
 
 validateParams(params)
 
-// Additional validation
-// Check rRNA databases for sortmerna
-if (params.sample_prep.remove_ribo_rna) {
-    ch_ribo_db = file(params.sample_prep.ribo_database_manifest, checkIfExists: true)
-    if (ch_ribo_db.isEmpty()) {exit 1, "File provided with --ribo_database_manifest is empty: ${ch_ribo_db.getName()}!"}
-}
-
 workflow PAN_GENE {
 
-    // Versions
-    ch_versions = Channel.empty()
-
-    // Input channels
-    Channel.fromList(params.target_assemblies)
-    | map { tag, filePath ->
-        [[id:tag], file(filePath, checkIfExists: true)]
-    }
-    | set { ch_target_assembly }
-
-    Channel.fromList(params.te_libraries)
-    | map { tag, filePath ->
-        [[id:tag], file(filePath, checkIfExists: true)]
-    }
-    | set { ch_te_library }
-
-    ch_samplesheet = Channel.empty()
-    if(params.samplesheet) {
-        ch_samplesheet = Channel.fromPath(params.samplesheet)
-    }
-    
-    Channel.of(params.target_assemblies.collect { tag, fastaPath -> tag.strip() }.join(","))
-    | set { ch_permissible_target_assemblies }
+    ch_versions                 = Channel.empty()
 
-    Channel.from(ch_ribo_db.readLines())
-    | map { row -> file(row, checkIfExists: true) }
-    | collect
-    | set { ch_sortmerna_fastas }
+    ch_target_assembly          = Channel.fromList(params.target_assemblies)
+                                | map { tag, filePath ->
+                                    [[id:tag], file(filePath, checkIfExists: true)]
+                                }
+
+    ch_te_library               = Channel.fromList(params.te_libraries)
+                                | map { tag, filePath ->
+                                    [[id:tag], file(filePath, checkIfExists: true)]
+                                }
+
+    ch_samplesheet              = params.samplesheet
+                                ? Channel.fromPath(params.samplesheet, checkIfExists: true)
+                                : Channel.empty()
+    
+    ch_tar_assm_str             = Channel.of(
+                                    params.target_assemblies
+                                    .collect { tag, fastaPath -> tag.strip() }.join(",")
+                                )
+
+    ch_ribo_db                  = params.remove_ribo_rna
+                                ? file(params.ribo_database_manifest, checkIfExists: true)
+                                : Channel.empty()
+
+    ch_sortmerna_fastas         = Channel.from(ch_ribo_db.readLines())
+                                | map { row -> file(row, checkIfExists: true) }
+                                | collect
+
+    ch_ext_prot_fastas          = (params.external_protein_fastas
+                                ? Channel.fromList(params.external_protein_fastas)
+                                : Channel.empty())
+                                | map { filePath ->
+                                    def fileHandle = file(filePath, checkIfExists: true)
+                                    [[id:fileHandle.getSimpleName()], fileHandle]
+                                }
 
     // SUBWORKFLOW: PREPARE_ASSEMBLY
     PREPARE_ASSEMBLY(
@@ -66,7 +65,7 @@ workflow PAN_GENE {
     // SUBWORKFLOW: PREPROCESS_RNASEQ
     PREPROCESS_RNASEQ(
         ch_samplesheet,
-        ch_permissible_target_assemblies,
+        ch_tar_assm_str,
         params.skip_fastqc,
         params.skip_fastp,
         params.save_trimmed,
@@ -79,147 +78,115 @@ workflow PAN_GENE {
     ch_reads_target             = PREPROCESS_RNASEQ.out.reads_target
     ch_versions                 = ch_versions.mix(PREPROCESS_RNASEQ.out.versions)
 
-    // SUBWORKFLOW: STAR_ALIGN
+    // SUBWORKFLOW: ALIGN_RNASEQ
     ALIGN_RNASEQ(
         ch_reads_target,
         ch_trim_reads,
         ch_target_assemby_index
     )
 
-    // MODULE: GUNZIP_EXTERNAL_PROTEIN_SEQ
-    ch_ext_prot_seqs = Channel.empty()
-    if(params.external_protein_seqs) {
-        ch_ext_prot_seqs = Channel.fromList(params.external_protein_seqs)
-    }
-    
-    ch_ext_prot_seqs
-    | map { filePath ->
-        def fileHandle = file(filePath, checkIfExists: true)
-        [[id:fileHandle.getSimpleName()], fileHandle]
-    }
-    | branch { meta, file ->
-        gz: "$file".endsWith(".gz")
-        rest: !"$file".endsWith(".gz")
-    }
-    | set { ch_ext_prot_seqs_branch }
-
-    GUNZIP_EXTERNAL_PROTEIN_SEQ(
-        ch_ext_prot_seqs_branch.gz
-    )
-    .gunzip
-    | mix(
-        ch_ext_prot_seqs_branch.rest
-    )
-    | set { ch_ext_prot_seqs }
+    ch_rnaseq_bam               = ALIGN_RNASEQ.out.bam
+    ch_versions                 = ch_versions.mix(ALIGN_RNASEQ.out.versions)
 
-    ch_versions = ch_versions.mix(GUNZIP_EXTERNAL_PROTEIN_SEQ.out.versions.first())
+    // MODULE: PREPARE_EXT_PROTS
+    PREPARE_EXT_PROTS(
+        ch_ext_prot_fastas
+    )
 
-    // MODULE: CAT_PROTEIN_SEQS
-    ch_ext_prot_seqs
-    | map { meta, filePath -> filePath }
-    | collect
-    | map { fileList -> [[id:"protein_seqs"], fileList] }
-    | CAT_PROTEIN_SEQS
-    
-    ch_ext_prot_seqs = CAT_PROTEIN_SEQS.out.file_out
-    ch_versions = ch_versions.mix(CAT_PROTEIN_SEQS.out.versions)
+    ch_ext_prots_fasta          = PREPARE_EXT_PROTS.out.ext_prots_fasta
+    ch_versions                 = ch_versions.mix(PREPARE_EXT_PROTS.out.versions)
 
     // MODULE: BRAKER3
-    REPEATMASKER.out.fasta_masked
-    | mix(ch_samtools_bam)
-    | groupTuple(size: 2, remainder: true)
-    | map { meta, groupedItems ->
-        def maskedFasta = groupedItems[0]
-
-        if(groupedItems.size() == 2) {
-            def bam = groupedItems[1]
-            return [meta, maskedFasta, bam]
-        } else {
-            return [meta, maskedFasta, []]
-        }
-    }
-    | set { ch_braker_inputs }
+    // ch_braker_inputs            =  REPEATMASKER.out.fasta_masked
+    //                             | mix(ch_rnaseq_bam)
+    //                             | groupTuple(size: 2, remainder: true)
+    //                             | map { meta, groupedItems ->
+    //                                 def maskedFasta = groupedItems[0]
+    //                                 def bam         = (groupedItems.size() == 2) ? groupedItems[1] : []
+                                    
+    //                                 [meta, maskedFasta, bam]
+    //                             }
     
-    if(params.external_protein_seqs) {
-        ch_braker_inputs
-        | combine(ch_ext_prot_seqs.map{meta, filePath -> filePath})
-        | set { ch_braker_inputs }
-    } else {
-        ch_braker_inputs
-        | map{meta, assembly, bam -> [meta, assembly, bam, []]}
-        | set { ch_braker_inputs }
-    }
+    // if(params.external_protein_fastas) {
+    //     ch_braker_inputs
+    //     | combine(ch_ext_prot_seqs.map{meta, filePath -> filePath})
+    //     | set { ch_braker_inputs }
+    // } else {
+    //     ch_braker_inputs
+    //     | map{meta, assembly, bam -> [meta, assembly, bam, []]}
+    //     | set { ch_braker_inputs }
+    // }
     
-    ch_fasta            = ch_braker_inputs.map { meta, assembly, bam, proteinSeq -> [meta, assembly] }
-    ch_bam              = ch_braker_inputs.map { meta, assembly, bam, proteinSeq -> bam }
-    ch_proteins         = ch_braker_inputs.map { meta, assembly, bam, proteinSeq -> proteinSeq }
-    ch_rnaseq_sets_dirs = []
-    ch_rnaseq_sets_ids  = []
-    ch_hintsfile        = []
-
-    BRAKER3(
-        ch_fasta,
-        ch_bam,
-        ch_rnaseq_sets_dirs,
-        ch_rnaseq_sets_ids,
-        ch_proteins,
-        ch_hintsfile
-    )
+    // ch_fasta            = ch_braker_inputs.map { meta, assembly, bam, proteinSeq -> [meta, assembly] }
+    // ch_bam              = ch_braker_inputs.map { meta, assembly, bam, proteinSeq -> bam }
+    // ch_proteins         = ch_braker_inputs.map { meta, assembly, bam, proteinSeq -> proteinSeq }
+    // ch_rnaseq_sets_dirs = []
+    // ch_rnaseq_sets_ids  = []
+    // ch_hintsfile        = []
+
+    // BRAKER3(
+    //     ch_fasta,
+    //     ch_bam,
+    //     ch_rnaseq_sets_dirs,
+    //     ch_rnaseq_sets_ids,
+    //     ch_proteins,
+    //     ch_hintsfile
+    // )
 
-    ch_versions = ch_versions.mix(BRAKER3.out.versions.first())
-
-    // MODULE: GUNZIP_XREF_FASTA
-    ch_xref_annotations = Channel.empty()
-    if(params.liftoff.xref_annotations) {
-        Channel.fromList(params.liftoff.xref_annotations)
-        | multiMap { fasta, gff ->
-            def fastaFile = file(fasta, checkIfExists:true)
-            def meta = [id:fastaFile.getSimpleName()]
-
-            fasta: [meta, fastaFile]
-            gff: [meta, file(gff, checkIfExists:true)]
-        }
-        | set { ch_xref_annotations }
-    }
-
-    ch_xref_annotations.fasta
-    | branch { meta, file ->
-        gz: "$file".endsWith(".gz")
-        rest: !"$file".endsWith(".gz")
-    }
-    | set { ch_xref_annotations_branch }
-
-    GUNZIP_XREF_FASTA(
-        ch_xref_annotations_branch.gz
-    )
-    .gunzip
-    | mix(
-        ch_xref_annotations_branch.rest
-    )
-    | set { ch_xref_annotations_fasta }
-
-    // MODULE: GUNZIP_XREF_GFF
-    ch_xref_annotations.gff
-    | branch { meta, file ->
-        gz: "$file".endsWith(".gz")
-        rest: !"$file".endsWith(".gz")
-    }
-    | set { ch_xref_annotations_gff_branch }
-
-    GUNZIP_XREF_GFF(
-        ch_xref_annotations_gff_branch.gff.map { meta, fasta, gff -> [meta, gff] }
-    )
-    .gunzip
-    | mix(
-        ch_xref_annotations_gff_branch.rest.map { meta, fasta, gff -> [meta, gff] }
-    )
-    | set { ch_xref_annotations_gff }
+    // ch_versions = ch_versions.mix(BRAKER3.out.versions.first())
+
+    // // MODULE: GUNZIP_XREF_FASTA
+    // ch_xref_annotations = Channel.empty()
+    // if(params.liftoff_xref_annotations) {
+    //     Channel.fromList(params.liftoff_xref_annotations)
+    //     | multiMap { fasta, gff ->
+    //         def fastaFile = file(fasta, checkIfExists:true)
+    //         def meta = [id:fastaFile.getSimpleName()]
+
+    //         fasta: [meta, fastaFile]
+    //         gff: [meta, file(gff, checkIfExists:true)]
+    //     }
+    //     | set { ch_xref_annotations }
+    // }
+
+    // ch_xref_annotations.fasta
+    // | branch { meta, file ->
+    //     gz: "$file".endsWith(".gz")
+    //     rest: !"$file".endsWith(".gz")
+    // }
+    // | set { ch_xref_annotations_branch }
+
+    // GUNZIP_XREF_FASTA(
+    //     ch_xref_annotations_branch.gz
+    // )
+    // .gunzip
+    // | mix(
+    //     ch_xref_annotations_branch.rest
+    // )
+    // | set { ch_xref_annotations_fasta }
+
+    // // MODULE: GUNZIP_XREF_GFF
+    // ch_xref_annotations.gff
+    // | branch { meta, file ->
+    //     gz: "$file".endsWith(".gz")
+    //     rest: !"$file".endsWith(".gz")
+    // }
+    // | set { ch_xref_annotations_gff_branch }
+
+    // GUNZIP_XREF_GFF(
+    //     ch_xref_annotations_gff_branch.gff.map { meta, fasta, gff -> [meta, gff] }
+    // )
+    // .gunzip
+    // | mix(
+    //     ch_xref_annotations_gff_branch.rest.map { meta, fasta, gff -> [meta, gff] }
+    // )
+    // | set { ch_xref_annotations_gff }
 
-    ch_xref_annotations_fasta
-    | join(
-        ch_xref_annotations_gff
-    )
-    | set { ch_xref_annotations }
+    // ch_xref_annotations_fasta
+    // | join(
+    //     ch_xref_annotations_gff
+    // )
+    // | set { ch_xref_annotations }
 
     // // MODULE: LIFTOFF
     // ch_xref_annotations

From 20317902969f86650d491fad1d3b916cb7677e2c Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Mon, 13 Nov 2023 13:08:23 +1300
Subject: [PATCH 11/59] Extracted subworkflows uptill BRAKER3

---
 modules/local/validate_params/main.nf   | 13 +++++-
 nextflow.config                         |  2 +-
 subworkflows/local/prepare_ext_prots.nf |  3 +-
 workflows/pan_gene.nf                   | 59 +++++++++----------------
 4 files changed, 36 insertions(+), 41 deletions(-)

diff --git a/modules/local/validate_params/main.nf b/modules/local/validate_params/main.nf
index e86302e..5933dfe 100644
--- a/modules/local/validate_params/main.nf
+++ b/modules/local/validate_params/main.nf
@@ -5,9 +5,9 @@ def validateParams(params) {
     validateTEFastaCorrespondence(params)
 
     if (params.remove_ribo_rna) {
-        ch_ribo_db = file(params.ribo_database_manifest, checkIfExists: true)
+        file_ribo_db = file(params.ribo_database_manifest, checkIfExists: true)
         
-        if (ch_ribo_db.isEmpty()) {exit 1, "File provided with --ribo_database_manifest is empty: ${ch_ribo_db.getName()}!"}
+        if (file_ribo_db.isEmpty()) {exit 1, "File provided with --ribo_database_manifest is empty: ${file_ribo_db.getName()}!"}
     }
 }
 
@@ -32,6 +32,11 @@ def validateFastaTags(params) {
 }
 
 def validateTETags(params) {
+
+    if(!params["te_libraries"]) {
+        return
+    }
+
     def listOfTETuples   = params["te_libraries"]
 
     if (listOfTETuples.isEmpty()) {
@@ -52,6 +57,10 @@ def validateTETags(params) {
 }
 
 def validateTEFastaCorrespondence(params) {
+
+    if(!params["te_libraries"]) {
+        return
+    }
     
     def listOfTETuples   = params["te_libraries"]
     def listOfFastaTuples   = params["target_assemblies"]
diff --git a/nextflow.config b/nextflow.config
index 3c630e9..002f73a 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -36,7 +36,7 @@ params {
     save_trimmed                = false
     // toggling this parameter results in rerun of FASTP and FASTQC_TRIM
 
-    remove_ribo_rna             = true
+    remove_ribo_rna             = false
     save_non_ribo_reads         = false
     ribo_database_manifest      = "${projectDir}/assets/rrna-db-defaults.txt"
 
diff --git a/subworkflows/local/prepare_ext_prots.nf b/subworkflows/local/prepare_ext_prots.nf
index 5109064..d14c60b 100644
--- a/subworkflows/local/prepare_ext_prots.nf
+++ b/subworkflows/local/prepare_ext_prots.nf
@@ -30,7 +30,8 @@ workflow PREPARE_EXT_PROTS {
     | map { fileList -> [[id:"ext_protein_seqs"], fileList] }
     | CAT_PROTEIN_FASTAS
 
-    GUNZIP.out.versions.first()
+    Channel.empty()
+    | mix(GUNZIP.out.versions.first())
     | mix(CAT_PROTEIN_FASTAS.out.versions)
     | set { ch_versions }
     
diff --git a/workflows/pan_gene.nf b/workflows/pan_gene.nf
index 37e5920..acfc77a 100644
--- a/workflows/pan_gene.nf
+++ b/workflows/pan_gene.nf
@@ -37,9 +37,9 @@ workflow PAN_GENE {
 
     ch_ribo_db                  = params.remove_ribo_rna
                                 ? file(params.ribo_database_manifest, checkIfExists: true)
-                                : Channel.empty()
+                                : null
 
-    ch_sortmerna_fastas         = Channel.from(ch_ribo_db.readLines())
+    ch_sortmerna_fastas         = Channel.from(ch_ribo_db ? ch_ribo_db.readLines() : null)
                                 | map { row -> file(row, checkIfExists: true) }
                                 | collect
 
@@ -97,43 +97,28 @@ workflow PAN_GENE {
     ch_versions                 = ch_versions.mix(PREPARE_EXT_PROTS.out.versions)
 
     // MODULE: BRAKER3
-    // ch_braker_inputs            =  REPEATMASKER.out.fasta_masked
-    //                             | mix(ch_rnaseq_bam)
-    //                             | groupTuple(size: 2, remainder: true)
-    //                             | map { meta, groupedItems ->
-    //                                 def maskedFasta = groupedItems[0]
-    //                                 def bam         = (groupedItems.size() == 2) ? groupedItems[1] : []
-                                    
-    //                                 [meta, maskedFasta, bam]
-    //                             }
-    
-    // if(params.external_protein_fastas) {
-    //     ch_braker_inputs
-    //     | combine(ch_ext_prot_seqs.map{meta, filePath -> filePath})
-    //     | set { ch_braker_inputs }
-    // } else {
-    //     ch_braker_inputs
-    //     | map{meta, assembly, bam -> [meta, assembly, bam, []]}
-    //     | set { ch_braker_inputs }
-    // }
+    ch_braker_inputs            = ch_masked_target_assembly
+                                | join(ch_rnaseq_bam, remainder: true)
+                                | combine(
+                                    ch_ext_prots_fasta.map { meta, filePath -> filePath }.ifEmpty(null)
+                                )
+                                | map { meta, fasta, bam, prots -> [meta, fasta, bam ?: [], prots ?: []] }
     
-    // ch_fasta            = ch_braker_inputs.map { meta, assembly, bam, proteinSeq -> [meta, assembly] }
-    // ch_bam              = ch_braker_inputs.map { meta, assembly, bam, proteinSeq -> bam }
-    // ch_proteins         = ch_braker_inputs.map { meta, assembly, bam, proteinSeq -> proteinSeq }
-    // ch_rnaseq_sets_dirs = []
-    // ch_rnaseq_sets_ids  = []
-    // ch_hintsfile        = []
-
-    // BRAKER3(
-    //     ch_fasta,
-    //     ch_bam,
-    //     ch_rnaseq_sets_dirs,
-    //     ch_rnaseq_sets_ids,
-    //     ch_proteins,
-    //     ch_hintsfile
-    // )
+    def rnaseq_sets_dirs        = []
+    def rnaseq_sets_ids         = []
+    def hintsfile               = []
+
+    BRAKER3(
+        ch_braker_inputs.map { meta, fasta, bam, prots -> [meta, fasta] },
+        ch_braker_inputs.map { meta, fasta, bam, prots -> bam },
+        rnaseq_sets_dirs,
+        rnaseq_sets_ids,
+        ch_braker_inputs.map { meta, fasta, bam, prots -> prots },
+        hintsfile
+    )
 
-    // ch_versions = ch_versions.mix(BRAKER3.out.versions.first())
+    ch_braker_gff3              = BRAKER3.out.gff3
+    ch_versions                 = ch_versions.mix(BRAKER3.out.versions.first())
 
     // // MODULE: GUNZIP_XREF_FASTA
     // ch_xref_annotations = Channel.empty()

From f3154677339b8bff09646220772dda71a39587fd Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Tue, 14 Nov 2023 09:30:31 +1300
Subject: [PATCH 12/59] Inc liftoff

---
 conf/manifest.config                          |  10 ++
 conf/modules.config                           |  13 +-
 modules/local/liftoff/main.nf                 |  17 +--
 modules/local/validate_params/main.nf         |  28 ++++-
 modules/nf-core/CHANGELOG.md                  |   2 +-
 .../dumpsoftwareversions/environment.yml      |   6 +
 .../custom/dumpsoftwareversions/main.nf       |  24 ++++
 .../custom/dumpsoftwareversions/meta.yml      |  37 ++++++
 .../templates/dumpsoftwareversions.py         | 101 ++++++++++++++++
 .../dumpsoftwareversions/tests/main.nf.test   |  38 ++++++
 .../tests/main.nf.test.snap                   |  27 +++++
 .../dumpsoftwareversions/tests/tags.yml       |   2 +
 nextflow.config                               |   5 +-
 pan_gene_pfr.sh                               |   2 +-
 subworkflows/local/fasta_liftoff.nf           |  79 ++++++++++++
 workflows/pan_gene.nf                         | 114 +++++++-----------
 16 files changed, 415 insertions(+), 90 deletions(-)
 create mode 100644 conf/manifest.config
 create mode 100644 modules/nf-core/custom/dumpsoftwareversions/environment.yml
 create mode 100644 modules/nf-core/custom/dumpsoftwareversions/main.nf
 create mode 100644 modules/nf-core/custom/dumpsoftwareversions/meta.yml
 create mode 100755 modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
 create mode 100644 modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test
 create mode 100644 modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml
 create mode 100644 subworkflows/local/fasta_liftoff.nf

diff --git a/conf/manifest.config b/conf/manifest.config
new file mode 100644
index 0000000..7bf1f6b
--- /dev/null
+++ b/conf/manifest.config
@@ -0,0 +1,10 @@
+manifest {
+    name                    = 'pan-gene'
+    author                  = """Usman Rashid"""
+    homePage                = 'https://github.com/PlantandFoodResearch/pan-gene'
+    description             = """A NextFlow pipeline for pan-genome annotation"""
+    mainScript              = 'main.nf'
+    nextflowVersion         = '!>=22.10.4'
+    version                 = '0.1'
+    doi                     = ''
+}
\ No newline at end of file
diff --git a/conf/modules.config b/conf/modules.config
index 6683fcc..8c99be5 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -135,7 +135,7 @@ process {
 if(params.liftoff_xref_annotations) {
     process {
         withName: LIFTOFF {
-            ext.args = '-exclude_partial -copies'
+            ext.args = '-exclude_partial -copies -polish'
             publishDir = [
                 path: { "${params.outdir}/liftoff/${meta.id}" },
                 mode: "copy",
@@ -143,4 +143,15 @@ if(params.liftoff_xref_annotations) {
             ]
         }
     }
+}
+
+process {
+    withName: 'CUSTOM_DUMPSOFTWAREVERSIONS' {
+        publishDir = [
+            path: params.outdir,
+            pattern: "software_versions.yml",
+            mode: "copy",
+            enabled: true
+        ]
+    }
 }
\ No newline at end of file
diff --git a/modules/local/liftoff/main.nf b/modules/local/liftoff/main.nf
index cec7bd1..5bfb6f2 100644
--- a/modules/local/liftoff/main.nf
+++ b/modules/local/liftoff/main.nf
@@ -2,7 +2,7 @@ process LIFTOFF {
     tag "$meta.id"
     label "process_high"
 
-    container "https://depot.galaxyproject.org/singularity/liftoff:1.6.3--pyhdfd78af_0"
+    container 'https://depot.galaxyproject.org/singularity/liftoff:1.6.3--pyhdfd78af_0'
 
     input:
     tuple val(meta), path(target_fa)
@@ -10,8 +10,8 @@ process LIFTOFF {
     path ref_gff
     
     output:
-    tuple val(meta), path("*.liftoff.gff3")         , emit: gff3
-    tuple val(meta), path("unmapped_features.txt")  , emit: unmapped
+    tuple val(meta), path("*.gff3")                 , emit: gff3
+    tuple val(meta), path("*.unmapped.txt")         , emit: unmapped
     path "versions.yml"                             , emit: versions
 
     when:
@@ -24,25 +24,28 @@ process LIFTOFF {
     liftoff \\
     -g $ref_gff \\
     -p $task.cpus \\
+    -o "${prefix}.gff3" \\
+    -u "${prefix}.unmapped.txt" \\
     $args \\
     $target_fa \\
     $ref_fa \\
-    > "${prefix}.liftoff.gff3"
+    2> liftoff.stderr
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        liftoff: \$(liftoff --version)
+        liftoff: \$(liftoff --version 2> /dev/null)
     END_VERSIONS
     """
     
     stub:
     def prefix = task.ext.prefix ?: "${meta.id}"
     """
-    touch "${prefix}.liftoff.gff3"
+    touch "${prefix}.gff3"
+    touch "${prefix}.unmapped.txt"
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        liftoff: \$(liftoff --version)
+        liftoff: \$(liftoff --version 2> /dev/null)
     END_VERSIONS
     """
 }
\ No newline at end of file
diff --git a/modules/local/validate_params/main.nf b/modules/local/validate_params/main.nf
index 5933dfe..5eb6207 100644
--- a/modules/local/validate_params/main.nf
+++ b/modules/local/validate_params/main.nf
@@ -1,14 +1,12 @@
 def validateParams(params) {
     validateFastaTags(params)
+    
     validateTETags(params)
-
     validateTEFastaCorrespondence(params)
 
-    if (params.remove_ribo_rna) {
-        file_ribo_db = file(params.ribo_database_manifest, checkIfExists: true)
-        
-        if (file_ribo_db.isEmpty()) {exit 1, "File provided with --ribo_database_manifest is empty: ${file_ribo_db.getName()}!"}
-    }
+    validateRiboDBManifest(params)
+
+    validateLiftoffXrefs(params)
 }
 
 def validateFastaTags(params) {
@@ -75,6 +73,24 @@ def validateTEFastaCorrespondence(params) {
     }
 }
 
+def validateRiboDBManifest(params) {
+    if (params.remove_ribo_rna) {
+        file_ribo_db = file(params.ribo_database_manifest, checkIfExists: true)
+        
+        if (file_ribo_db.isEmpty()) {exit 1, "File provided with --ribo_database_manifest is empty: ${file_ribo_db.getName()}!"}
+    }
+}
+
+def validateLiftoffXrefs(params) {
+    if(!params["liftoff_xref_annotations"]) {
+        return
+    }
+
+    if(isNotListOfLists(params["liftoff_xref_annotations"]), 2) {
+        error "Error: liftoff_xref_annotations must be a list of sublists, with each sublist containing 2 elements"
+    }
+}
+
 def isNotListOfLists(thisOne, subListSize) {
     return (!(thisOne instanceof List) || thisOne.isEmpty() || thisOne.any { !(it instanceof List) || it.size() != subListSize })
 }
\ No newline at end of file
diff --git a/modules/nf-core/CHANGELOG.md b/modules/nf-core/CHANGELOG.md
index f7e0034..280bc90 100644
--- a/modules/nf-core/CHANGELOG.md
+++ b/modules/nf-core/CHANGELOG.md
@@ -25,4 +25,4 @@
 1. Added stub
 2. Added author in meta.yml
 
-- Repo: https://github.com/nf-core/modules/tree/18cd2206622dc606bbceea533c7823feb2a251db
\ No newline at end of file
+- Repo: https://github.com/nf-core/modules/tree/71dbe24bee9ad6c013d4dd400d92612f6bf01ab8
\ No newline at end of file
diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml
new file mode 100644
index 0000000..9d0e6b2
--- /dev/null
+++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml
@@ -0,0 +1,6 @@
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::multiqc=1.17
diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf
new file mode 100644
index 0000000..7685b33
--- /dev/null
+++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf
@@ -0,0 +1,24 @@
+process CUSTOM_DUMPSOFTWAREVERSIONS {
+    label 'process_single'
+
+    // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/multiqc:1.17--pyhdfd78af_0' :
+        'biocontainers/multiqc:1.17--pyhdfd78af_0' }"
+
+    input:
+    path versions
+
+    output:
+    path "software_versions.yml"    , emit: yml
+    path "software_versions_mqc.yml", emit: mqc_yml
+    path "versions.yml"             , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    template 'dumpsoftwareversions.py'
+}
diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml
new file mode 100644
index 0000000..9414c32
--- /dev/null
+++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml
@@ -0,0 +1,37 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json
+name: custom_dumpsoftwareversions
+description: Custom module used to dump software versions within the nf-core pipeline template
+keywords:
+  - custom
+  - dump
+  - version
+tools:
+  - custom:
+      description: Custom module used to dump software versions within the nf-core pipeline template
+      homepage: https://github.com/nf-core/tools
+      documentation: https://github.com/nf-core/tools
+      licence: ["MIT"]
+input:
+  - versions:
+      type: file
+      description: YML file containing software versions
+      pattern: "*.yml"
+output:
+  - yml:
+      type: file
+      description: Standard YML file containing software versions
+      pattern: "software_versions.yml"
+  - mqc_yml:
+      type: file
+      description: MultiQC custom content YML file containing software versions
+      pattern: "software_versions_mqc.yml"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@drpatelh"
+  - "@grst"
+maintainers:
+  - "@drpatelh"
+  - "@grst"
diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
new file mode 100755
index 0000000..da03340
--- /dev/null
+++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
@@ -0,0 +1,101 @@
+#!/usr/bin/env python
+
+
+"""Provide functions to merge multiple versions.yml files."""
+
+
+import yaml
+import platform
+from textwrap import dedent
+
+
+def _make_versions_html(versions):
+    """Generate a tabular HTML output of all versions for MultiQC."""
+    html = [
+        dedent(
+            """\\
+            <style>
+            #nf-core-versions tbody:nth-child(even) {
+                background-color: #f2f2f2;
+            }
+            </style>
+            <table class="table" style="width:100%" id="nf-core-versions">
+                <thead>
+                    <tr>
+                        <th> Process Name </th>
+                        <th> Software </th>
+                        <th> Version  </th>
+                    </tr>
+                </thead>
+            """
+        )
+    ]
+    for process, tmp_versions in sorted(versions.items()):
+        html.append("<tbody>")
+        for i, (tool, version) in enumerate(sorted(tmp_versions.items())):
+            html.append(
+                dedent(
+                    f"""\\
+                    <tr>
+                        <td><samp>{process if (i == 0) else ''}</samp></td>
+                        <td><samp>{tool}</samp></td>
+                        <td><samp>{version}</samp></td>
+                    </tr>
+                    """
+                )
+            )
+        html.append("</tbody>")
+    html.append("</table>")
+    return "\\n".join(html)
+
+
+def main():
+    """Load all version files and generate merged output."""
+    versions_this_module = {}
+    versions_this_module["${task.process}"] = {
+        "python": platform.python_version(),
+        "yaml": yaml.__version__,
+    }
+
+    with open("$versions") as f:
+        versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module
+
+    # aggregate versions by the module name (derived from fully-qualified process name)
+    versions_by_module = {}
+    for process, process_versions in versions_by_process.items():
+        module = process.split(":")[-1]
+        try:
+            if versions_by_module[module] != process_versions:
+                raise AssertionError(
+                    "We assume that software versions are the same between all modules. "
+                    "If you see this error-message it means you discovered an edge-case "
+                    "and should open an issue in nf-core/tools. "
+                )
+        except KeyError:
+            versions_by_module[module] = process_versions
+
+    versions_by_module["Workflow"] = {
+        "Nextflow": "$workflow.nextflow.version",
+        "$workflow.manifest.name": "$workflow.manifest.version",
+    }
+
+    versions_mqc = {
+        "id": "software_versions",
+        "section_name": "${workflow.manifest.name} Software Versions",
+        "section_href": "https://github.com/${workflow.manifest.name}",
+        "plot_type": "html",
+        "description": "are collected at run time from the software output.",
+        "data": _make_versions_html(versions_by_module),
+    }
+
+    with open("software_versions.yml", "w") as f:
+        yaml.dump(versions_by_module, f, default_flow_style=False)
+    with open("software_versions_mqc.yml", "w") as f:
+        yaml.dump(versions_mqc, f, default_flow_style=False)
+
+    with open("versions.yml", "w") as f:
+        yaml.dump(versions_this_module, f, default_flow_style=False)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test
new file mode 100644
index 0000000..eec1db1
--- /dev/null
+++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test
@@ -0,0 +1,38 @@
+nextflow_process {
+
+    name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS"
+    script "../main.nf"
+    process "CUSTOM_DUMPSOFTWAREVERSIONS"
+    tag "modules"
+    tag "modules_nfcore"
+    tag "custom"
+    tag "dumpsoftwareversions"
+    tag "custom/dumpsoftwareversions"
+
+    test("Should run without failures") {
+        when {
+            process {
+                """
+                def tool1_version = '''
+                TOOL1:
+                    tool1: 0.11.9
+                '''.stripIndent()
+
+                def tool2_version = '''
+                TOOL2:
+                    tool2: 1.9
+                '''.stripIndent()
+
+                input[0] = Channel.of(tool1_version, tool2_version).collectFile()
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+}
diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap
new file mode 100644
index 0000000..4274ed5
--- /dev/null
+++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap
@@ -0,0 +1,27 @@
+{
+    "Should run without failures": {
+        "content": [
+            {
+                "0": [
+                    "software_versions.yml:md5,1c851188476409cda5752ce971b20b58"
+                ],
+                "1": [
+                    "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d"
+                ],
+                "2": [
+                    "versions.yml:md5,3843ac526e762117eedf8825b40683df"
+                ],
+                "mqc_yml": [
+                    "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d"
+                ],
+                "versions": [
+                    "versions.yml:md5,3843ac526e762117eedf8825b40683df"
+                ],
+                "yml": [
+                    "software_versions.yml:md5,1c851188476409cda5752ce971b20b58"
+                ]
+            }
+        ],
+        "timestamp": "2023-11-03T14:43:22.157011"
+    }
+}
diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml
new file mode 100644
index 0000000..405aa24
--- /dev/null
+++ b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml
@@ -0,0 +1,2 @@
+custom/dumpsoftwareversions:
+  - modules/nf-core/custom/dumpsoftwareversions/**
diff --git a/nextflow.config b/nextflow.config
index 002f73a..574bf39 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -67,10 +67,11 @@ params {
 
     outdir                      = "./results"
 
-    max_cpus                    = 1
-    max_memory                  = 4.GB
+    max_cpus                    = 12
+    max_memory                  = 200.GB
     max_time                    = 1.days
 }
 
+includeConfig './conf/manifest.config'
 includeConfig './conf/modules.config'
 includeConfig './conf/reporting_defaults.config'
\ No newline at end of file
diff --git a/pan_gene_pfr.sh b/pan_gene_pfr.sh
index a1b1ced..c45623e 100644
--- a/pan_gene_pfr.sh
+++ b/pan_gene_pfr.sh
@@ -5,7 +5,7 @@
 #SBATCH --time=1-00:00:00
 #SBATCH --nodes=1
 #SBATCH --ntasks=1
-#SBATCH --cpus-per-task=2
+#SBATCH --cpus-per-task=1
 #SBATCH --output pan_gene_pfr.stdout
 #SBATCH --error pan_gene_pfr.stderr
 #SBATCH --mem=4G
diff --git a/subworkflows/local/fasta_liftoff.nf b/subworkflows/local/fasta_liftoff.nf
new file mode 100644
index 0000000..9a3cba8
--- /dev/null
+++ b/subworkflows/local/fasta_liftoff.nf
@@ -0,0 +1,79 @@
+include { GUNZIP as GUNZIP_FASTA    } from '../../modules/nf-core/gunzip'
+include { GUNZIP as GUNZIP_GFF      } from '../../modules/nf-core/gunzip'
+include { LIFTOFF                   } from '../../modules/local/liftoff'
+
+
+workflow FASTA_LIFTOFF {
+    take:
+    target_assemby              // Channel: [ meta, fasta ]
+    xref_annotations_fasta      // Channel: [ meta2, fasta ]
+    xref_annotations_gff        // Channel: [ meta2, gff3 ]
+    
+    main:
+    // MODULE: GUNZIP_FASTA
+    xref_annotations_fasta
+    | branch { meta, file ->
+        gz: "$file".endsWith(".gz")
+        rest: !"$file".endsWith(".gz")
+    }
+    | set { xref_annotations_fasta_branch }
+
+    GUNZIP_FASTA(
+        xref_annotations_fasta_branch.gz
+    )
+    .gunzip
+    | mix(
+        xref_annotations_fasta_branch.rest
+    )
+    | set { ch_xref_annotations_gunzip_fasta }
+
+    // MODULE: GUNZIP_GFF
+    xref_annotations_gff
+    | branch { meta, file ->
+        gz: "$file".endsWith(".gz")
+        rest: !"$file".endsWith(".gz")
+    }
+    | set { xref_annotations_gff_branch }
+
+    GUNZIP_GFF(
+        xref_annotations_gff_branch.gz
+    )
+    .gunzip
+    | mix(
+        xref_annotations_gff_branch.rest
+    )
+    | set { ch_xref_annotations_gunzip_gff }
+
+    // MODULE: LIFTOFF
+    target_assemby
+    | combine(
+        ch_xref_annotations_gunzip_fasta
+        | join(
+            ch_xref_annotations_gunzip_gff
+        )
+    )
+    | map { meta, targetFasta, refMeta, refFasta, refGFF  ->
+        [[id:"${meta.id}.from.${refMeta.id}", target_assemby: meta.id], targetFasta, refFasta, refGFF]
+    }
+    | set { ch_liftoff_inputs }
+
+    LIFTOFF(
+        ch_liftoff_inputs.map { meta, targetFasta, refFasta, refGFF -> [meta, targetFasta] },
+        ch_liftoff_inputs.map { meta, targetFasta, refFasta, refGFF -> refFasta },
+        ch_liftoff_inputs.map { meta, targetFasta, refFasta, refGFF -> refGFF }
+    )
+    .gff3
+    | map { meta, gff -> [[id: meta.target_assemby], gff] }
+    | groupTuple
+    | set { ch_liftoff_gff3 }
+
+    Channel.empty()
+    | mix(GUNZIP_FASTA.out.versions.first())
+    | mix(GUNZIP_GFF.out.versions.first())
+    | mix(LIFTOFF.out.versions.first())
+    | set { ch_versions }
+
+    emit:
+    gff3        = ch_liftoff_gff3               // [ meta, [ gff3 ] ]
+    versions    = ch_versions                   // [ versions.yml ]
+}
\ No newline at end of file
diff --git a/workflows/pan_gene.nf b/workflows/pan_gene.nf
index acfc77a..11699f5 100644
--- a/workflows/pan_gene.nf
+++ b/workflows/pan_gene.nf
@@ -1,14 +1,15 @@
-nextflow.enable.dsl=2
+include { validateParams                } from '../modules/local/validate_params'
 
-include { BRAKER3                               } from '../modules/kherronism/braker3'
-include { GUNZIP as GUNZIP_XREF_FASTA           } from '../modules/nf-core/gunzip'
-include { GUNZIP as GUNZIP_XREF_GFF             } from '../modules/nf-core/gunzip'
-include { validateParams                        } from '../modules/local/validate_params'
+include { PREPARE_ASSEMBLY              } from '../subworkflows/local/prepare_assembly'
+include { PREPROCESS_RNASEQ             } from '../subworkflows/local/preprocess_rnaseq'
+include { ALIGN_RNASEQ                  } from '../subworkflows/local/align_rnaseq'
+include { PREPARE_EXT_PROTS             } from '../subworkflows/local/prepare_ext_prots'
 
-include { PREPARE_ASSEMBLY                      } from '../subworkflows/local/prepare_assembly'
-include { PREPROCESS_RNASEQ                     } from '../subworkflows/local/preprocess_rnaseq'
-include { ALIGN_RNASEQ                          } from '../subworkflows/local/align_rnaseq'
-include { PREPARE_EXT_PROTS                     } from '../subworkflows/local/prepare_ext_prots'
+include { BRAKER3                       } from '../modules/kherronism/braker3'
+
+include { FASTA_LIFTOFF                 } from '../subworkflows/local/fasta_liftoff'
+
+include { CUSTOM_DUMPSOFTWAREVERSIONS   } from '../modules/nf-core/custom/dumpsoftwareversions'
 
 validateParams(params)
 
@@ -39,17 +40,32 @@ workflow PAN_GENE {
                                 ? file(params.ribo_database_manifest, checkIfExists: true)
                                 : null
 
-    ch_sortmerna_fastas         = Channel.from(ch_ribo_db ? ch_ribo_db.readLines() : null)
+    ch_sortmerna_fastas         = ch_ribo_db
+                                ? Channel.from(ch_ribo_db ? ch_ribo_db.readLines() : null)
                                 | map { row -> file(row, checkIfExists: true) }
                                 | collect
+                                : Channel.empty()
 
-    ch_ext_prot_fastas          = (params.external_protein_fastas
+    ch_ext_prot_fastas          = params.external_protein_fastas
                                 ? Channel.fromList(params.external_protein_fastas)
-                                : Channel.empty())
                                 | map { filePath ->
                                     def fileHandle = file(filePath, checkIfExists: true)
                                     [[id:fileHandle.getSimpleName()], fileHandle]
                                 }
+                                : Channel.empty()
+    
+    ch_xref_annotations_mm      = params.liftoff_xref_annotations
+                                ? Channel.fromList(params.liftoff_xref_annotations)
+                                | multiMap { fasta, gff ->
+                                    def fastaFile = file(fasta, checkIfExists:true)
+
+                                    fasta: [[id:fastaFile.getSimpleName()], fastaFile]
+                                    gff: [[id:fastaFile.getSimpleName()], file(gff, checkIfExists:true)]
+                                }
+                                : Channel.empty()
+
+    ch_xref_annotations_fasta   = ch_xref_annotations_mm.fasta
+    ch_xref_annotations_gff     = ch_xref_annotations_mm.gff
 
     // SUBWORKFLOW: PREPARE_ASSEMBLY
     PREPARE_ASSEMBLY(
@@ -120,64 +136,18 @@ workflow PAN_GENE {
     ch_braker_gff3              = BRAKER3.out.gff3
     ch_versions                 = ch_versions.mix(BRAKER3.out.versions.first())
 
-    // // MODULE: GUNZIP_XREF_FASTA
-    // ch_xref_annotations = Channel.empty()
-    // if(params.liftoff_xref_annotations) {
-    //     Channel.fromList(params.liftoff_xref_annotations)
-    //     | multiMap { fasta, gff ->
-    //         def fastaFile = file(fasta, checkIfExists:true)
-    //         def meta = [id:fastaFile.getSimpleName()]
-
-    //         fasta: [meta, fastaFile]
-    //         gff: [meta, file(gff, checkIfExists:true)]
-    //     }
-    //     | set { ch_xref_annotations }
-    // }
-
-    // ch_xref_annotations.fasta
-    // | branch { meta, file ->
-    //     gz: "$file".endsWith(".gz")
-    //     rest: !"$file".endsWith(".gz")
-    // }
-    // | set { ch_xref_annotations_branch }
-
-    // GUNZIP_XREF_FASTA(
-    //     ch_xref_annotations_branch.gz
-    // )
-    // .gunzip
-    // | mix(
-    //     ch_xref_annotations_branch.rest
-    // )
-    // | set { ch_xref_annotations_fasta }
-
-    // // MODULE: GUNZIP_XREF_GFF
-    // ch_xref_annotations.gff
-    // | branch { meta, file ->
-    //     gz: "$file".endsWith(".gz")
-    //     rest: !"$file".endsWith(".gz")
-    // }
-    // | set { ch_xref_annotations_gff_branch }
-
-    // GUNZIP_XREF_GFF(
-    //     ch_xref_annotations_gff_branch.gff.map { meta, fasta, gff -> [meta, gff] }
-    // )
-    // .gunzip
-    // | mix(
-    //     ch_xref_annotations_gff_branch.rest.map { meta, fasta, gff -> [meta, gff] }
-    // )
-    // | set { ch_xref_annotations_gff }
-
-    // ch_xref_annotations_fasta
-    // | join(
-    //     ch_xref_annotations_gff
-    // )
-    // | set { ch_xref_annotations }
-
-    // // MODULE: LIFTOFF
-    // ch_xref_annotations
-    // | combine(
-    //     ch_validated_target_assemblies
-    // )
-    // | map { meta, ref_fasta, refGFF, targetMeta, targetFasta -> [[id:"${targetMeta.id}.from.${meta.id}"], ref_fasta, refGFF, targetFasta] }
-    // | set { ch_liftoff_inputs }
+    // SUBWORKFLOW: FASTA_LIFTOFF
+    FASTA_LIFTOFF(
+        ch_valid_target_assembly,
+        ch_xref_annotations_fasta,
+        ch_xref_annotations_gff
+    )
+
+    ch_liftoff_gff3             = FASTA_LIFTOFF.out.gff3
+    ch_versions                 = ch_versions.mix(FASTA_LIFTOFF.out.versions)
+
+    // MODULE: CUSTOM_DUMPSOFTWAREVERSIONS
+    CUSTOM_DUMPSOFTWAREVERSIONS (
+        ch_versions.unique().collectFile(name: 'collated_versions.yml')
+    )
 }
\ No newline at end of file

From f10ae9425689924f1133e5ed957858351a2a9fe4 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Tue, 14 Nov 2023 10:24:42 +1300
Subject: [PATCH 13/59] Added polished out channel to liftoff

---
 modules/local/liftoff/main.nf | 11 ++++++++---
 workflows/pan_gene.nf         |  2 +-
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/modules/local/liftoff/main.nf b/modules/local/liftoff/main.nf
index 5bfb6f2..e10374d 100644
--- a/modules/local/liftoff/main.nf
+++ b/modules/local/liftoff/main.nf
@@ -10,9 +10,10 @@ process LIFTOFF {
     path ref_gff
     
     output:
-    tuple val(meta), path("*.gff3")                 , emit: gff3
-    tuple val(meta), path("*.unmapped.txt")         , emit: unmapped
-    path "versions.yml"                             , emit: versions
+    tuple val(meta), path("*.gff3")             , emit: gff3
+    tuple val(meta), path("*.polished.gff3")    , emit: polished_gff3, optional: true
+    tuple val(meta), path("*.unmapped.txt")     , emit: unmapped_txt
+    path "versions.yml"                         , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
@@ -31,6 +32,10 @@ process LIFTOFF {
     $ref_fa \\
     2> liftoff.stderr
 
+    [ -f "${prefix}.gff3_polished" ] \\
+    && mv "${prefix}.gff3_polished" "${prefix}.polished.gff3" \\
+    || echo "-polish is absent"
+
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         liftoff: \$(liftoff --version 2> /dev/null)
diff --git a/workflows/pan_gene.nf b/workflows/pan_gene.nf
index 11699f5..c442354 100644
--- a/workflows/pan_gene.nf
+++ b/workflows/pan_gene.nf
@@ -143,7 +143,7 @@ workflow PAN_GENE {
         ch_xref_annotations_gff
     )
 
-    ch_liftoff_gff3             = FASTA_LIFTOFF.out.gff3
+    ch_liftoff_gff3             = FASTA_LIFTOFF.out.polished_gff3
     ch_versions                 = ch_versions.mix(FASTA_LIFTOFF.out.versions)
 
     // MODULE: CUSTOM_DUMPSOFTWAREVERSIONS

From 80db270eae1073dabac28a34ffbb78bd2e5965ac Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Tue, 14 Nov 2023 10:38:56 +1300
Subject: [PATCH 14/59] Added gffread before liftoff

---
 conf/modules.config                           |  4 +++
 modules/nf-core/CHANGELOG.md                  |  8 ++++-
 modules/nf-core/gffread/environment.yml       |  6 ++++
 modules/nf-core/gffread/main.nf               | 35 +++++++++++++++++++
 modules/nf-core/gffread/meta.yml              | 33 +++++++++++++++++
 modules/nf-core/gffread/tests/main.nf.test    | 32 +++++++++++++++++
 .../nf-core/gffread/tests/main.nf.test.snap   | 21 +++++++++++
 modules/nf-core/gffread/tests/tags.yml        |  2 ++
 subworkflows/local/fasta_liftoff.nf           | 12 +++++--
 9 files changed, 150 insertions(+), 3 deletions(-)
 create mode 100644 modules/nf-core/gffread/environment.yml
 create mode 100644 modules/nf-core/gffread/main.nf
 create mode 100644 modules/nf-core/gffread/meta.yml
 create mode 100644 modules/nf-core/gffread/tests/main.nf.test
 create mode 100644 modules/nf-core/gffread/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/gffread/tests/tags.yml

diff --git a/conf/modules.config b/conf/modules.config
index 8c99be5..5448813 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -142,6 +142,10 @@ if(params.liftoff_xref_annotations) {
                 saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
             ]
         }
+
+        withName: GFFREAD {
+            ext.args = '--no-pseudo --keep-genes'
+        }
     }
 }
 
diff --git a/modules/nf-core/CHANGELOG.md b/modules/nf-core/CHANGELOG.md
index 280bc90..b2b47c7 100644
--- a/modules/nf-core/CHANGELOG.md
+++ b/modules/nf-core/CHANGELOG.md
@@ -25,4 +25,10 @@
 1. Added stub
 2. Added author in meta.yml
 
-- Repo: https://github.com/nf-core/modules/tree/71dbe24bee9ad6c013d4dd400d92612f6bf01ab8
\ No newline at end of file
+- Repo: https://github.com/nf-core/modules/tree/71dbe24bee9ad6c013d4dd400d92612f6bf01ab8
+
+### gffread
+
+1. Added gff3 channel
+2. Made output channels optional
+3. Added author in meta.yml
\ No newline at end of file
diff --git a/modules/nf-core/gffread/environment.yml b/modules/nf-core/gffread/environment.yml
new file mode 100644
index 0000000..d127cae
--- /dev/null
+++ b/modules/nf-core/gffread/environment.yml
@@ -0,0 +1,6 @@
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::gffread=0.12.1
diff --git a/modules/nf-core/gffread/main.nf b/modules/nf-core/gffread/main.nf
new file mode 100644
index 0000000..d1477ab
--- /dev/null
+++ b/modules/nf-core/gffread/main.nf
@@ -0,0 +1,35 @@
+process GFFREAD {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/gffread:0.12.1--h8b12597_0' :
+        'biocontainers/gffread:0.12.1--h8b12597_0' }"
+
+    input:
+    tuple val(meta), path(gff)
+
+    output:
+    tuple val(meta), path("*.gtf")  , emit: gtf, optional: true
+    tuple val(meta), path("*.gff3") , emit: gff, optional: true
+    path "versions.yml"             , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args        = task.ext.args         ?: ''
+    def prefix      = task.ext.prefix       ?: "${gff.baseName}"
+    def extension   = args.contains("-T")   ?  '.gtf' : '.gff3'
+    """
+    gffread \\
+        $gff \\
+        $args \\
+        -o ${prefix}.${extension}
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        gffread: \$(gffread --version 2>&1)
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/gffread/meta.yml b/modules/nf-core/gffread/meta.yml
new file mode 100644
index 0000000..8a09a20
--- /dev/null
+++ b/modules/nf-core/gffread/meta.yml
@@ -0,0 +1,33 @@
+name: gffread
+description: Validate, filter, convert and perform various other operations on GFF files
+keywords:
+  - gff
+  - conversion
+  - validation
+tools:
+  - gffread:
+      description: GFF/GTF utility providing format conversions, region filtering, FASTA sequence extraction and more.
+      homepage: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread
+      documentation: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread
+      tool_dev_url: https://github.com/gpertea/gffread
+      doi: 10.12688/f1000research.23297.1
+      licence: ["MIT"]
+input:
+  - gff:
+      type: file
+      description: A reference file in either the GFF3, GFF2 or GTF format.
+      pattern: "*.{gff, gtf}"
+output:
+  - gtf:
+      type: file
+      description: GTF file resulting from the conversion of the GFF input file
+      pattern: "*.{gtf}"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@emiller88"
+  - "@gallvp"
+maintainers:
+  - "@emiller88"
diff --git a/modules/nf-core/gffread/tests/main.nf.test b/modules/nf-core/gffread/tests/main.nf.test
new file mode 100644
index 0000000..67d47ec
--- /dev/null
+++ b/modules/nf-core/gffread/tests/main.nf.test
@@ -0,0 +1,32 @@
+nextflow_process {
+
+    name "Test Process GFFREAD"
+    script "../main.nf"
+    process "GFFREAD"
+    tag "gffread"
+    tag "modules_nfcore"
+    tag "modules"
+
+    test("Should run without failures") {
+
+        when {
+            params {
+                outdir = "$outputDir"
+            }
+            process {
+                """
+                input[0] = file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true)
+                """
+            }
+        }
+
+        then {
+            assertAll (
+            { assert process.success },
+            { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+}
diff --git a/modules/nf-core/gffread/tests/main.nf.test.snap b/modules/nf-core/gffread/tests/main.nf.test.snap
new file mode 100644
index 0000000..fb5460c
--- /dev/null
+++ b/modules/nf-core/gffread/tests/main.nf.test.snap
@@ -0,0 +1,21 @@
+{
+    "Should run without failures": {
+        "content": [
+            {
+                "0": [
+                    "genome.gtf:md5,f184f856b7fe3e159d21b052b5dd3954"
+                ],
+                "1": [
+                    "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6"
+                ],
+                "gtf": [
+                    "genome.gtf:md5,f184f856b7fe3e159d21b052b5dd3954"
+                ],
+                "versions": [
+                    "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6"
+                ]
+            }
+        ],
+        "timestamp": "2023-10-17T10:00:08.542490523"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/gffread/tests/tags.yml b/modules/nf-core/gffread/tests/tags.yml
new file mode 100644
index 0000000..0557606
--- /dev/null
+++ b/modules/nf-core/gffread/tests/tags.yml
@@ -0,0 +1,2 @@
+gffread:
+  - modules/nf-core/gffread/**
diff --git a/subworkflows/local/fasta_liftoff.nf b/subworkflows/local/fasta_liftoff.nf
index 9a3cba8..8ac820e 100644
--- a/subworkflows/local/fasta_liftoff.nf
+++ b/subworkflows/local/fasta_liftoff.nf
@@ -1,8 +1,8 @@
 include { GUNZIP as GUNZIP_FASTA    } from '../../modules/nf-core/gunzip'
 include { GUNZIP as GUNZIP_GFF      } from '../../modules/nf-core/gunzip'
+include { GFFREAD                   } from '../../modules/nf-core/gffread'
 include { LIFTOFF                   } from '../../modules/local/liftoff'
 
-
 workflow FASTA_LIFTOFF {
     take:
     target_assemby              // Channel: [ meta, fasta ]
@@ -44,12 +44,19 @@ workflow FASTA_LIFTOFF {
     )
     | set { ch_xref_annotations_gunzip_gff }
 
+    // MODULE: GFFREAD
+    GFFREAD(
+        ch_xref_annotations_gunzip_gff
+    )
+    .gff
+    | set { ch_gffread_gff }
+
     // MODULE: LIFTOFF
     target_assemby
     | combine(
         ch_xref_annotations_gunzip_fasta
         | join(
-            ch_xref_annotations_gunzip_gff
+            ch_gffread_gff
         )
     )
     | map { meta, targetFasta, refMeta, refFasta, refGFF  ->
@@ -70,6 +77,7 @@ workflow FASTA_LIFTOFF {
     Channel.empty()
     | mix(GUNZIP_FASTA.out.versions.first())
     | mix(GUNZIP_GFF.out.versions.first())
+    | mix(GFFREAD.out.versions.first())
     | mix(LIFTOFF.out.versions.first())
     | set { ch_versions }
 

From bb9b8b06973c19a34c98f7ce59deaffa942f6d1d Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Tue, 14 Nov 2023 12:00:47 +1300
Subject: [PATCH 15/59] Updated flowchart

---
 README.md | 124 +++++++++++++++++++++++-------------------------------
 1 file changed, 52 insertions(+), 72 deletions(-)

diff --git a/README.md b/README.md
index 7dbf45e..237657f 100644
--- a/README.md
+++ b/README.md
@@ -5,90 +5,70 @@ A NextFlow pipeline for pan-genome annotation.
 
 ```mermaid
 flowchart TD
-    ribo_db((ribo_db))
-    SAMPLESHEET((samples))
-    TE_LIBRARIES(("[te_libs]"))
-    TARGET_ASSEMBLIES(("[assemblies]"))
-    EXTERNAL_PROTEIN_SEQS(("[ext_prots]"))
-    
-    GUNZIP_PROT[GUNZIP]
-    GUNZIP_TE[GUNZIP]
-    SKIP_EDTA{Skip EDTA}
-    pend((dev))
-    
-    TE_LIBRARIES --> GUNZIP_TE
-    GUNZIP_TE --> SKIP_EDTA
-    
-    TARGET_ASSEMBLIES --> GUNZIP
-    GUNZIP --> FASTA_VALIDATE
-    FASTA_VALIDATE --> FASTA_PERFORM_EDTA
-    FASTA_VALIDATE --> SKIP_EDTA
-    
-    SKIP_EDTA --> REPEATMASKER
-    FASTA_PERFORM_EDTA --> REPEATMASKER
-    REPEATMASKER --> STAR_GENOMEGENERATE
-
-    SAMPLESHEET --> SAMPLESHEET_CHECK
-    SAMPLESHEET_CHECK --> |Technical replicates|CAT_FASTQ
-    CAT_FASTQ --> FASTQC
-    SAMPLESHEET_CHECK --> FASTQC
-    FASTQC --> FASTP
-    
-    ribo_db --> SORTMERNA
-    FASTP --> SORTMERNA
-    SORTMERNA --> STAR_ALIGN
-    STAR_GENOMEGENERATE --> STAR_ALIGN
-    STAR_ALIGN --> GROUP_BY_ASSEMBLY([Group by assembly])
-    GROUP_BY_ASSEMBLY --> SAMTOOLS_CAT
-    SAMTOOLS_CAT --> |RNASeq bam|BRAKER3
-
-    REPEATMASKER --> BRAKER3
-
-    EXTERNAL_PROTEIN_SEQS --> GUNZIP_PROT
-    GUNZIP_PROT --> CAT
-    CAT --> BRAKER3
-    
-    BRAKER3 --> pend
-
-    subgraph Params
+    subgraph PrepareAssembly [ ]
     TARGET_ASSEMBLIES
     TE_LIBRARIES
-    SAMPLESHEET
-    ribo_db
-    EXTERNAL_PROTEIN_SEQS
-    end
-
-    subgraph GenomePrep
-    GUNZIP
     FASTA_VALIDATE
-    GUNZIP_TE
-    FASTA_PERFORM_EDTA
-    SKIP_EDTA
+    EDTA
     REPEATMASKER
-    STAR_GENOMEGENERATE
     end
-
-    subgraph Braker
-    CAT
-    GUNZIP_PROT
-    BRAKER3
-    end
-
-    subgraph SamplePrep
-    SAMPLESHEET_CHECK
+    
+    TARGET_ASSEMBLIES(["[target_assemblies]"])
+    TE_LIBRARIES(["[te_libs]"])
+    TARGET_ASSEMBLIES --> FASTA_VALIDATE
+    FASTA_VALIDATE --> EDTA
+    TE_LIBRARIES --> REPEATMASKER
+    EDTA --> |te_lib absent|REPEATMASKER
+
+    subgraph Samplesheet [ ]
+    SAMPLESHEET
     CAT_FASTQ
     FASTQC
     FASTP
+    FASTP_FASTQC
     SORTMERNA
-    STAR_ALIGN
-    GROUP_BY_ASSEMBLY
+    STAR
     SAMTOOLS_CAT
     end
+    
+    SAMPLESHEET([samplesheet])
+    SAMPLESHEET --> |Tech. reps|CAT_FASTQ
+    CAT_FASTQ --> FASTQC
+    SAMPLESHEET --> FASTQC
+    FASTQC --> FASTP
+    FASTP --> FASTP_FASTQC[FASTQC]
+    FASTP_FASTQC --> SORTMERNA
+    SORTMERNA --> STAR
+    STAR --> SAMTOOLS_CAT
+
+    subgraph Annotation [ ]
+    anno_fasta(( ))
+    anno_masked_fasta(( ))
+    anno_bam(( ))
+    EXTERNAL_PROTEIN_SEQS(["[ext_prots]"])
+    XREF_ANNOTATIONS(["[xref_annotations]"])
+    CAT
+    BRAKER3
+    GFFREAD
+    LIFTOFF
+    end
+
+    PrepareAssembly --> |Fasta, Masked fasta|Annotation
+    Samplesheet --> |RNASeq bam|Annotation
+
+    XREF_ANNOTATIONS --> |xref_gff|GFFREAD
+    XREF_ANNOTATIONS --> |xref_fasta|LIFTOFF
+    GFFREAD --> LIFTOFF
+    anno_fasta --> |Fasta|LIFTOFF
+    
+    EXTERNAL_PROTEIN_SEQS --> CAT
+    anno_masked_fasta --> |Masked fasta|BRAKER3
+    anno_bam --> |RNASeq bam|BRAKER3
+    CAT --> BRAKER3
 
-    style Params fill:#00FFFF21,stroke:#00FFFF21
-    style GenomePrep fill:#00FFFF21,stroke:#00FFFF21
-    style SamplePrep fill:#00FFFF21,stroke:#00FFFF21
-    style Braker fill:#00FFFF21,stroke:#00FFFF21
+    style Samplesheet fill:#00FFFF21,stroke:#00FFFF21
+    style PrepareAssembly fill:#00FFFF21,stroke:#00FFFF21
+    style Annotation fill:#00FFFF21,stroke:#00FFFF21
 ```
 
 ## Plant&Food Users

From c982946527e4776eb60504e8a126ce5605fa5f26 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Thu, 16 Nov 2023 08:13:09 +1300
Subject: [PATCH 16/59] Added liftoff options

---
 conf/modules.config                 | 9 ++++++++-
 nextflow.config                     | 3 +++
 subworkflows/local/fasta_liftoff.nf | 2 +-
 workflows/pan_gene.nf               | 2 +-
 4 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 5448813..58830e9 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -135,7 +135,14 @@ process {
 if(params.liftoff_xref_annotations) {
     process {
         withName: LIFTOFF {
-            ext.args = '-exclude_partial -copies -polish'
+            ext.args = '    '
+            ext.args = [
+                '-exclude_partial',
+                '-copies',
+                '-polish',
+                "-a $params.liftoff_coverage",
+                "-s $params.liftoff_identity"
+            ].join(' ').trim()
             publishDir = [
                 path: { "${params.outdir}/liftoff/${meta.id}" },
                 mode: "copy",
diff --git a/nextflow.config b/nextflow.config
index 574bf39..6180e35 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -65,6 +65,9 @@ params {
     // Format:      [ [ fasta(.gz), gff3(.gz) ] ]
     // Optional:    Set to null if not available
 
+    liftoff_coverage            = 0.9
+    liftoff_identity            = 0.9
+
     outdir                      = "./results"
 
     max_cpus                    = 12
diff --git a/subworkflows/local/fasta_liftoff.nf b/subworkflows/local/fasta_liftoff.nf
index 8ac820e..5e6fd22 100644
--- a/subworkflows/local/fasta_liftoff.nf
+++ b/subworkflows/local/fasta_liftoff.nf
@@ -69,7 +69,7 @@ workflow FASTA_LIFTOFF {
         ch_liftoff_inputs.map { meta, targetFasta, refFasta, refGFF -> refFasta },
         ch_liftoff_inputs.map { meta, targetFasta, refFasta, refGFF -> refGFF }
     )
-    .gff3
+    .polished_gff3
     | map { meta, gff -> [[id: meta.target_assemby], gff] }
     | groupTuple
     | set { ch_liftoff_gff3 }
diff --git a/workflows/pan_gene.nf b/workflows/pan_gene.nf
index c442354..11699f5 100644
--- a/workflows/pan_gene.nf
+++ b/workflows/pan_gene.nf
@@ -143,7 +143,7 @@ workflow PAN_GENE {
         ch_xref_annotations_gff
     )
 
-    ch_liftoff_gff3             = FASTA_LIFTOFF.out.polished_gff3
+    ch_liftoff_gff3             = FASTA_LIFTOFF.out.gff3
     ch_versions                 = ch_versions.mix(FASTA_LIFTOFF.out.versions)
 
     // MODULE: CUSTOM_DUMPSOFTWAREVERSIONS

From c8ff8dac9f38702a41782f5893eb0466d70ee245 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Tue, 21 Nov 2023 15:14:49 +1300
Subject: [PATCH 17/59] Started moving to nf-core/tools

---
 .gitignore                            |  9 ++++++---
 README.md                             |  2 +-
 TODO.md                               |  6 +++++-
 conf/base.config                      |  4 +++-
 main.nf                               |  8 ++++++--
 modules/kherronism/braker3/main.nf    |  2 --
 modules/local/edta/edta/main.nf       |  1 -
 nextflow.config                       |  6 +++---
 pan_gene_pfr.sh                       | 18 ------------------
 pangene_pfr.sh                        | 18 ++++++++++++++++++
 workflows/{pan_gene.nf => pangene.nf} |  2 +-
 11 files changed, 43 insertions(+), 33 deletions(-)
 delete mode 100644 pan_gene_pfr.sh
 create mode 100644 pangene_pfr.sh
 rename workflows/{pan_gene.nf => pangene.nf} (99%)

diff --git a/.gitignore b/.gitignore
index 6e9d9d4..8f984b0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,9 +1,12 @@
-.DS_Store
-*.pyc
-__pycahce__
 .nextflow*
 work/
+data/
 results/
+.DS_Store
+testing/
+testing*
+*.pyc
+
 *.stdout
 *.stderr
 
diff --git a/README.md b/README.md
index 237657f..ea8b609 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# PAN-GENE
+# PANGENE
 A NextFlow pipeline for pan-genome annotation.
 
 ## Pipeline Flowchart
diff --git a/TODO.md b/TODO.md
index 6e1e66c..8c90b99 100644
--- a/TODO.md
+++ b/TODO.md
@@ -1 +1,5 @@
-- [ ] Add --eval=reference.gtf
\ No newline at end of file
+- [ ] Add --eval=reference.gtf
+- [ ] From Ross regarding post-processing:
+
+> [9:49 am] Ross Crowhurst
+Here is an easy one: BLATSp vs swissprot & Arabidpsis and check query is with set thresholds of reference - if so accept; If not move to BLASTp vs Uniref90 or Refeq (or some other predetermined model species) - same deal accept if within threshold limits. Else BLASTn of cds vs NCBI nt (really scrapping the bottom of the barrel here). If not a hit to anything then chances are its garbage and should be removed. Some ppl might try to claim its a unique protein to the genotype but in 20 years I have never seen one of those be supported - mostly this category is garbage. The screen agains NCBI nt also assists to classify "bits" as well retroposonss etc. Idea being you want to remove garbage predictions - as this does take time you can see why some papers just filter out by size.
\ No newline at end of file
diff --git a/conf/base.config b/conf/base.config
index 4cdec8d..54db554 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -61,9 +61,11 @@ process {
     }
 }
 
-singularity {
+apptainer {
     enabled                 = true
     autoMounts              = true
+    envWhitelist            = "APPTAINER_BINDPATH,APPTAINER_BIND"
+    registry                = 'quay.io'
 }
 
 nextflow {
diff --git a/main.nf b/main.nf
index c8a54e2..7fe5247 100755
--- a/main.nf
+++ b/main.nf
@@ -2,8 +2,12 @@
 
 nextflow.enable.dsl=2
 
-include { PAN_GENE } from './workflows/pan_gene.nf'
+include { PANGENE } from './workflows/pangene.nf'
 
 workflow {
-    PAN_GENE()
+    PFR_PANGENE()
+}
+
+workflow PFR_PANGENE {
+    PANGENE()
 }
\ No newline at end of file
diff --git a/modules/kherronism/braker3/main.nf b/modules/kherronism/braker3/main.nf
index 82fa096..c9d915a 100644
--- a/modules/kherronism/braker3/main.nf
+++ b/modules/kherronism/braker3/main.nf
@@ -6,8 +6,6 @@ process BRAKER3 {
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'registry.hub.docker.com/teambraker/braker3:v.1.0.3':
         'registry.hub.docker.com/teambraker/braker3:v.1.0.3' }"
-    
-    containerOptions "-B $TMPDIR:$TMPDIR"
 
     input:
     tuple val(meta), path(fasta)
diff --git a/modules/local/edta/edta/main.nf b/modules/local/edta/edta/main.nf
index 2e6d759..9c9b180 100644
--- a/modules/local/edta/edta/main.nf
+++ b/modules/local/edta/edta/main.nf
@@ -4,7 +4,6 @@ process EDTA {
     label "process_week_long"
     
     container 'https://depot.galaxyproject.org/singularity/edta:2.1.0--hdfd78af_1'
-    containerOptions "-B $TMPDIR:$TMPDIR"
 
     input:
     tuple val(meta), path(fasta_file)
diff --git a/nextflow.config b/nextflow.config
index 6180e35..135bf29 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -2,8 +2,8 @@ includeConfig './conf/base.config'
 
 params {
     target_assemblies           = [
-        ["red5_v2p1", "/workspace/hrauxr/pan-gene/.test/red5_v2p1_chr1.fasta"],
-        ["donghong", "/workspace/hrauxr/pan-gene/.test/donghong.chr1.fsa.gz"]
+        ["red5_v2p1", "/workspace/hrauxr/pangene/.test/red5_v2p1_chr1.fasta"],
+        ["donghong", "/workspace/hrauxr/pangene/.test/donghong.chr1.fsa.gz"]
     ]
     // Pattern:             [ [tag, fasta(.gz) ] ]
     // Permissible tags:    tag, tag_1, tag_tag2_3, tag_tag2_tag3;
@@ -11,7 +11,7 @@ params {
     //                      "." is not allowed in the tag name
     
     te_libraries                = [
-        ["donghong", "/workspace/hrauxr/pan-gene/.test/donghong.TElib.fa.gz"]
+        ["donghong", "/workspace/hrauxr/pangene/.test/donghong.TElib.fa.gz"]
     ]
     // Pattern:             [ [tag, fasta(.gz) ] ]
     // Optional             Set to null if libraries are not available.
diff --git a/pan_gene_pfr.sh b/pan_gene_pfr.sh
deleted file mode 100644
index c45623e..0000000
--- a/pan_gene_pfr.sh
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/bin/bash -e
-
-
-#SBATCH --job-name PAN_GENE
-#SBATCH --time=1-00:00:00
-#SBATCH --nodes=1
-#SBATCH --ntasks=1
-#SBATCH --cpus-per-task=1
-#SBATCH --output pan_gene_pfr.stdout
-#SBATCH --error pan_gene_pfr.stderr
-#SBATCH --mem=4G
-
-ml apptainer/1.1
-ml nextflow/22.10.4
-
-export TMPDIR="/workspace/$USER/tmp"
-
-nextflow main.nf -profile slurm -resume
\ No newline at end of file
diff --git a/pangene_pfr.sh b/pangene_pfr.sh
new file mode 100644
index 0000000..785199e
--- /dev/null
+++ b/pangene_pfr.sh
@@ -0,0 +1,18 @@
+#!/bin/bash -e
+
+
+#SBATCH --job-name PANGENE
+#SBATCH --time=1-00:00:00
+#SBATCH --nodes=1
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=1
+#SBATCH --output pangene_pfr.stdout
+#SBATCH --error pangene_pfr.stderr
+#SBATCH --mem=4G
+
+ml apptainer/1.1
+ml nextflow/23.04.4
+
+export APPTAINER_BINDPATH="$APPTAINER_BINDPATH,/workspace/$USER/tmp:/tmp"
+
+nextflow main.nf -profile slurm -resume
\ No newline at end of file
diff --git a/workflows/pan_gene.nf b/workflows/pangene.nf
similarity index 99%
rename from workflows/pan_gene.nf
rename to workflows/pangene.nf
index 11699f5..9110688 100644
--- a/workflows/pan_gene.nf
+++ b/workflows/pangene.nf
@@ -13,7 +13,7 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS   } from '../modules/nf-core/custom/dumpso
 
 validateParams(params)
 
-workflow PAN_GENE {
+workflow PANGENE {
 
     ch_versions                 = Channel.empty()
 

From f28546183bb88402cf5c62f00e2f549f16490b6c Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Wed, 22 Nov 2023 12:51:01 +1300
Subject: [PATCH 18/59] Reimported modules using nf-core/tools

---
 .nf-core.yml                                  |   1 +
 modules.json                                  | 108 ++++
 modules/nf-core/CHANGELOG.md                  |  34 --
 modules/nf-core/LICENSE                       |  21 -
 modules/nf-core/cat/cat/environment.yml       |   7 +
 modules/nf-core/cat/cat/main.nf               |   2 +-
 modules/nf-core/cat/cat/meta.yml              |   7 +-
 modules/nf-core/cat/cat/tests/main.nf.test    | 153 ++++++
 .../nf-core/cat/cat/tests/main.nf.test.snap   | 121 +++++
 .../cat/tests/nextflow_unzipped_zipped.config |   6 +
 .../cat/tests/nextflow_zipped_unzipped.config |   8 +
 modules/nf-core/cat/cat/tests/tags.yml        |   2 +
 modules/nf-core/cat/fastq/environment.yml     |   7 +
 modules/nf-core/cat/fastq/main.nf             |   2 +-
 modules/nf-core/cat/fastq/meta.yml            |   4 +-
 modules/nf-core/cat/fastq/tests/main.nf.test  | 143 ++++++
 .../nf-core/cat/fastq/tests/main.nf.test.snap |  78 +++
 modules/nf-core/cat/fastq/tests/tags.yml      |   2 +
 .../dumpsoftwareversions/environment.yml      |   1 +
 .../custom/dumpsoftwareversions/meta.yml      |   2 +-
 modules/nf-core/fastp/environment.yml         |   7 +
 modules/nf-core/fastp/main.nf                 |  20 +-
 modules/nf-core/fastp/meta.yml                |   5 +-
 modules/nf-core/fastp/tests/main.nf.test      | 485 ++++++++++++++++++
 modules/nf-core/fastp/tests/main.nf.test.snap |  52 ++
 modules/nf-core/fastp/tests/nextflow.config   |   6 +
 modules/nf-core/fastp/tests/tags.yml          |   2 +
 modules/nf-core/fastqc/environment.yml        |   7 +
 modules/nf-core/fastqc/main.nf                |   6 +-
 modules/nf-core/fastqc/meta.yml               |   5 +
 modules/nf-core/fastqc/tests/main.nf.test     |  23 +-
 .../nf-core/fastqc/tests/main.nf.test.snap    |  10 +
 modules/nf-core/fastqc/tests/tags.yml         |   2 +
 modules/nf-core/gffread/environment.yml       |   1 +
 modules/nf-core/gffread/main.nf               |  16 +-
 modules/nf-core/gffread/meta.yml              |   1 -
 modules/nf-core/gunzip/environment.yml        |   7 +
 modules/nf-core/gunzip/main.nf                |   2 +-
 modules/nf-core/gunzip/meta.yml               |   4 +
 modules/nf-core/gunzip/tests/main.nf.test     |  35 ++
 .../nf-core/gunzip/tests/main.nf.test.snap    |  31 ++
 modules/nf-core/gunzip/tests/tags.yml         |   2 +
 modules/nf-core/samtools/cat/environment.yml  |   7 +
 modules/nf-core/samtools/cat/main.nf          |   2 +-
 modules/nf-core/samtools/cat/meta.yml         |   2 +
 modules/nf-core/sortmerna/environment.yml     |   7 +
 modules/nf-core/sortmerna/main.nf             |  28 +-
 modules/nf-core/sortmerna/meta.yml            |   4 +-
 modules/nf-core/star/align/environment.yml    |   9 +
 modules/nf-core/star/align/main.nf            |   2 +-
 modules/nf-core/star/align/meta.yml           |   6 +-
 .../star/genomegenerate/environment.yml       |   9 +
 modules/nf-core/star/genomegenerate/main.nf   |   8 +-
 modules/nf-core/star/genomegenerate/meta.yml  |   6 +-
 modules/nf-core/star/starsolo/main.nf         |  94 ----
 modules/nf-core/star/starsolo/meta.yml        |  79 ---
 modules/nf-core/trinity/main.nf               |  74 ---
 modules/nf-core/trinity/meta.yml              |  45 --
 modules/nf-core/umitools/dedup/main.nf        |  62 ---
 modules/nf-core/umitools/dedup/meta.yml       |  68 ---
 .../nf-core/umitools/extract/environment.yml  |   7 +
 modules/nf-core/umitools/extract/main.nf      |   2 +-
 modules/nf-core/umitools/extract/meta.yml     |  17 +-
 .../umitools/extract/tests/main.nf.test       |  35 ++
 .../umitools/extract/tests/main.nf.test.snap  |  10 +
 .../umitools/extract/tests/nextflow.config    |   9 +
 .../nf-core/umitools/extract/tests/tags.yml   |   2 +
 modules/nf-core/umitools/group/main.nf        |  62 ---
 modules/nf-core/umitools/group/meta.yml       |  62 ---
 .../fastq_fastqc_umitools_fastp/main.nf       |   3 +-
 .../fastq_fastqc_umitools_fastp/meta.yml      |   9 +-
 71 files changed, 1463 insertions(+), 705 deletions(-)
 create mode 100644 .nf-core.yml
 create mode 100644 modules.json
 delete mode 100644 modules/nf-core/CHANGELOG.md
 delete mode 100644 modules/nf-core/LICENSE
 create mode 100644 modules/nf-core/cat/cat/environment.yml
 create mode 100644 modules/nf-core/cat/cat/tests/main.nf.test
 create mode 100644 modules/nf-core/cat/cat/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config
 create mode 100644 modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config
 create mode 100644 modules/nf-core/cat/cat/tests/tags.yml
 create mode 100644 modules/nf-core/cat/fastq/environment.yml
 create mode 100644 modules/nf-core/cat/fastq/tests/main.nf.test
 create mode 100644 modules/nf-core/cat/fastq/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/cat/fastq/tests/tags.yml
 create mode 100644 modules/nf-core/fastp/environment.yml
 create mode 100644 modules/nf-core/fastp/tests/main.nf.test
 create mode 100644 modules/nf-core/fastp/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/fastp/tests/nextflow.config
 create mode 100644 modules/nf-core/fastp/tests/tags.yml
 create mode 100644 modules/nf-core/fastqc/environment.yml
 create mode 100644 modules/nf-core/fastqc/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/fastqc/tests/tags.yml
 create mode 100644 modules/nf-core/gunzip/environment.yml
 create mode 100644 modules/nf-core/gunzip/tests/main.nf.test
 create mode 100644 modules/nf-core/gunzip/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/gunzip/tests/tags.yml
 create mode 100644 modules/nf-core/samtools/cat/environment.yml
 create mode 100644 modules/nf-core/sortmerna/environment.yml
 create mode 100644 modules/nf-core/star/align/environment.yml
 create mode 100644 modules/nf-core/star/genomegenerate/environment.yml
 delete mode 100644 modules/nf-core/star/starsolo/main.nf
 delete mode 100644 modules/nf-core/star/starsolo/meta.yml
 delete mode 100644 modules/nf-core/trinity/main.nf
 delete mode 100644 modules/nf-core/trinity/meta.yml
 delete mode 100644 modules/nf-core/umitools/dedup/main.nf
 delete mode 100644 modules/nf-core/umitools/dedup/meta.yml
 create mode 100644 modules/nf-core/umitools/extract/environment.yml
 create mode 100644 modules/nf-core/umitools/extract/tests/main.nf.test
 create mode 100644 modules/nf-core/umitools/extract/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/umitools/extract/tests/nextflow.config
 create mode 100644 modules/nf-core/umitools/extract/tests/tags.yml
 delete mode 100644 modules/nf-core/umitools/group/main.nf
 delete mode 100644 modules/nf-core/umitools/group/meta.yml

diff --git a/.nf-core.yml b/.nf-core.yml
new file mode 100644
index 0000000..b1a7f0e
--- /dev/null
+++ b/.nf-core.yml
@@ -0,0 +1 @@
+repository_type: pipeline
\ No newline at end of file
diff --git a/modules.json b/modules.json
new file mode 100644
index 0000000..14b25cd
--- /dev/null
+++ b/modules.json
@@ -0,0 +1,108 @@
+{
+    "name": "PlantandFoodResearch/pangene",
+    "homePage": "https://github.com/PlantandFoodResearch/pangene",
+    "repos": {
+        "https://github.com/nf-core/modules.git": {
+            "modules": {
+                "nf-core": {
+                    "cat/cat": {
+                        "branch": "master",
+                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "cat/fastq": {
+                        "branch": "master",
+                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "custom/dumpsoftwareversions": {
+                        "branch": "master",
+                        "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "fastp": {
+                        "branch": "master",
+                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "installed_by": [
+                            "fastq_fastqc_umitools_fastp"
+                        ]
+                    },
+                    "fastqc": {
+                        "branch": "master",
+                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "installed_by": [
+                            "modules",
+                            "fastq_fastqc_umitools_fastp"
+                        ]
+                    },
+                    "gffread": {
+                        "branch": "master",
+                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "gunzip": {
+                        "branch": "master",
+                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "samtools/cat": {
+                        "branch": "master",
+                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "sortmerna": {
+                        "branch": "master",
+                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "star/align": {
+                        "branch": "master",
+                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "star/genomegenerate": {
+                        "branch": "master",
+                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "umitools/extract": {
+                        "branch": "master",
+                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "installed_by": [
+                            "fastq_fastqc_umitools_fastp"
+                        ]
+                    }
+                }
+            },
+            "subworkflows": {
+                "nf-core": {
+                    "fastq_fastqc_umitools_fastp": {
+                        "branch": "master",
+                        "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f",
+                        "installed_by": [
+                            "subworkflows"
+                        ]
+                    }
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/CHANGELOG.md b/modules/nf-core/CHANGELOG.md
deleted file mode 100644
index b2b47c7..0000000
--- a/modules/nf-core/CHANGELOG.md
+++ /dev/null
@@ -1,34 +0,0 @@
-## Source
-
-- Repo: https://github.com/nf-core/modules/tree/3fbcafe2543dabcc7b2be0f3b24507002b3e4b0d
-- License: See LICENSE file
-
-## Changes
-
-### trinity
-
-1. Added stub
-2. Added author in meta.yml
-
-### fastp
-
-1. Added stub
-2. Added author in meta.yml
-
-### star/genomegenerate
-
-1. Added star_ignore_sjdbgtf
-2. Added author in meta.yml
-
-### sortmerna
-
-1. Added stub
-2. Added author in meta.yml
-
-- Repo: https://github.com/nf-core/modules/tree/71dbe24bee9ad6c013d4dd400d92612f6bf01ab8
-
-### gffread
-
-1. Added gff3 channel
-2. Made output channels optional
-3. Added author in meta.yml
\ No newline at end of file
diff --git a/modules/nf-core/LICENSE b/modules/nf-core/LICENSE
deleted file mode 100644
index d2e2384..0000000
--- a/modules/nf-core/LICENSE
+++ /dev/null
@@ -1,21 +0,0 @@
-MIT License
-
-Copyright (c) Philip Ewels
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
\ No newline at end of file
diff --git a/modules/nf-core/cat/cat/environment.yml b/modules/nf-core/cat/cat/environment.yml
new file mode 100644
index 0000000..17a04ef
--- /dev/null
+++ b/modules/nf-core/cat/cat/environment.yml
@@ -0,0 +1,7 @@
+name: cat_cat
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - conda-forge::pigz=2.3.4
diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf
index 9f06221..4264a92 100644
--- a/modules/nf-core/cat/cat/main.nf
+++ b/modules/nf-core/cat/cat/main.nf
@@ -2,7 +2,7 @@ process CAT_CAT {
     tag "$meta.id"
     label 'process_low'
 
-    conda "conda-forge::pigz=2.3.4"
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/pigz:2.3.4' :
         'biocontainers/pigz:2.3.4' }"
diff --git a/modules/nf-core/cat/cat/meta.yml b/modules/nf-core/cat/cat/meta.yml
index 8acc0bf..00a8db0 100644
--- a/modules/nf-core/cat/cat/meta.yml
+++ b/modules/nf-core/cat/cat/meta.yml
@@ -7,9 +7,7 @@ keywords:
 tools:
   - cat:
       description: Just concatenation
-
       documentation: https://man7.org/linux/man-pages/man1/cat.1.html
-
       licence: ["GPL-3.0-or-later"]
 input:
   - meta:
@@ -21,7 +19,6 @@ input:
       type: file
       description: List of compressed / uncompressed files
       pattern: "*"
-
 output:
   - versions:
       type: file
@@ -31,7 +28,9 @@ output:
       type: file
       description: Concatenated file. Will be gzipped if file_out ends with ".gz"
       pattern: "${file_out}"
-
 authors:
   - "@erikrikarddaniel"
   - "@FriederikeHanssen"
+maintainers:
+  - "@erikrikarddaniel"
+  - "@FriederikeHanssen"
diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test
new file mode 100644
index 0000000..5766daa
--- /dev/null
+++ b/modules/nf-core/cat/cat/tests/main.nf.test
@@ -0,0 +1,153 @@
+nextflow_process {
+
+    name "Test Process CAT_CAT"
+    script "../main.nf"
+    process "CAT_CAT"
+    tag "modules"
+    tag "modules_nfcore"
+    tag "cat"
+    tag "cat/cat"
+
+    test("test_cat_unzipped_unzipped") {
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'test', single_end:true ],
+                        [
+                            file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
+                            file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+
+    test("test_cat_zipped_zipped") {
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'test', single_end:true ],
+                        [
+                            file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true),
+                            file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+        then {
+            def lines = path(process.out.file_out.get(0).get(1)).linesGzip
+            assertAll(
+                { assert process.success },
+                { assert snapshot(lines[0..5]).match("test_cat_zipped_zipped_lines") },
+                { assert snapshot(lines.size()).match("test_cat_zipped_zipped_size")}
+            )
+        }
+    }
+
+    test("test_cat_zipped_unzipped") {
+        config './nextflow_zipped_unzipped.config'
+
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'test', single_end:true ],
+                        [
+                            file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true),
+                            file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+    test("test_cat_unzipped_zipped") {
+        config './nextflow_unzipped_zipped.config'
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'test', single_end:true ],
+                        [
+                            file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
+                            file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+        then {
+            def lines = path(process.out.file_out.get(0).get(1)).linesGzip
+            assertAll(
+                { assert process.success },
+                { assert snapshot(lines[0..5]).match("test_cat_unzipped_zipped_lines") },
+                { assert snapshot(lines.size()).match("test_cat_unzipped_zipped_size")}
+            )
+        }
+    }
+
+    test("test_cat_one_file_unzipped_zipped") {
+        config './nextflow_unzipped_zipped.config'
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'test', single_end:true ],
+                        [
+                            file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+        then {
+            def lines = path(process.out.file_out.get(0).get(1)).linesGzip
+            assertAll(
+                { assert process.success },
+                { assert snapshot(lines[0..5]).match("test_cat_one_file_unzipped_zipped_lines") },
+                { assert snapshot(lines.size()).match("test_cat_one_file_unzipped_zipped_size")}
+            )
+        }
+    }
+}
+
diff --git a/modules/nf-core/cat/cat/tests/main.nf.test.snap b/modules/nf-core/cat/cat/tests/main.nf.test.snap
new file mode 100644
index 0000000..423571b
--- /dev/null
+++ b/modules/nf-core/cat/cat/tests/main.nf.test.snap
@@ -0,0 +1,121 @@
+{
+    "test_cat_unzipped_zipped_size": {
+        "content": [
+            375
+        ],
+        "timestamp": "2023-10-16T14:33:08.049445686"
+    },
+    "test_cat_unzipped_unzipped": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894"
+                ],
+                "file_out": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894"
+                ]
+            }
+        ],
+        "timestamp": "2023-10-16T14:32:18.500464399"
+    },
+    "test_cat_zipped_unzipped": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894"
+                ],
+                "file_out": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894"
+                ]
+            }
+        ],
+        "timestamp": "2023-10-16T14:32:49.642741302"
+    },
+    "test_cat_zipped_zipped_lines": {
+        "content": [
+            [
+                "MT192765.1\tGenbank\ttranscript\t259\t29667\t.\t+\t.\tID=unknown_transcript_1;geneID=orf1ab;gene_name=orf1ab",
+                "MT192765.1\tGenbank\tgene\t259\t21548\t.\t+\t.\tParent=unknown_transcript_1",
+                "MT192765.1\tGenbank\tCDS\t259\t13461\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1",
+                "MT192765.1\tGenbank\tCDS\t13461\t21548\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1",
+                "MT192765.1\tGenbank\tCDS\t21556\t25377\t.\t+\t0\tParent=unknown_transcript_1;gbkey=CDS;gene=S;note=\"structural protein\";product=\"surface glycoprotein\";protein_id=QIK50427.1",
+                "MT192765.1\tGenbank\tgene\t21556\t25377\t.\t+\t.\tParent=unknown_transcript_1"
+            ]
+        ],
+        "timestamp": "2023-10-16T14:32:33.629048645"
+    },
+    "test_cat_unzipped_zipped_lines": {
+        "content": [
+            [
+                ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome",
+                "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT",
+                "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG",
+                "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG",
+                "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT",
+                "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG"
+            ]
+        ],
+        "timestamp": "2023-10-16T14:33:08.038830506"
+    },
+    "test_cat_one_file_unzipped_zipped_lines": {
+        "content": [
+            [
+                ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome",
+                "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT",
+                "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG",
+                "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG",
+                "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT",
+                "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG"
+            ]
+        ],
+        "timestamp": "2023-10-16T14:33:21.39642399"
+    },
+    "test_cat_zipped_zipped_size": {
+        "content": [
+            78
+        ],
+        "timestamp": "2023-10-16T14:32:33.641869244"
+    },
+    "test_cat_one_file_unzipped_zipped_size": {
+        "content": [
+            374
+        ],
+        "timestamp": "2023-10-16T14:33:21.4094373"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config
new file mode 100644
index 0000000..ec26b0f
--- /dev/null
+++ b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config
@@ -0,0 +1,6 @@
+
+process {
+    withName: CAT_CAT {
+        ext.prefix = 'cat.txt.gz'
+    }
+}
diff --git a/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config
new file mode 100644
index 0000000..fbc7978
--- /dev/null
+++ b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config
@@ -0,0 +1,8 @@
+
+process {
+
+    withName: CAT_CAT {
+        ext.prefix = 'cat.txt'
+    }
+
+}
diff --git a/modules/nf-core/cat/cat/tests/tags.yml b/modules/nf-core/cat/cat/tests/tags.yml
new file mode 100644
index 0000000..37b578f
--- /dev/null
+++ b/modules/nf-core/cat/cat/tests/tags.yml
@@ -0,0 +1,2 @@
+cat/cat:
+  - modules/nf-core/cat/cat/**
diff --git a/modules/nf-core/cat/fastq/environment.yml b/modules/nf-core/cat/fastq/environment.yml
new file mode 100644
index 0000000..bff93ad
--- /dev/null
+++ b/modules/nf-core/cat/fastq/environment.yml
@@ -0,0 +1,7 @@
+name: cat_fastq
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - conda-forge::sed=4.7
diff --git a/modules/nf-core/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf
index 5021e6f..3d96378 100644
--- a/modules/nf-core/cat/fastq/main.nf
+++ b/modules/nf-core/cat/fastq/main.nf
@@ -2,7 +2,7 @@ process CAT_FASTQ {
     tag "$meta.id"
     label 'process_single'
 
-    conda "conda-forge::sed=4.7"
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
         'nf-core/ubuntu:20.04' }"
diff --git a/modules/nf-core/cat/fastq/meta.yml b/modules/nf-core/cat/fastq/meta.yml
index 8a39e30..db4ac3c 100644
--- a/modules/nf-core/cat/fastq/meta.yml
+++ b/modules/nf-core/cat/fastq/meta.yml
@@ -34,7 +34,9 @@ output:
       type: file
       description: File containing software versions
       pattern: "versions.yml"
-
 authors:
   - "@joseespinosa"
   - "@drpatelh"
+maintainers:
+  - "@joseespinosa"
+  - "@drpatelh"
diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test b/modules/nf-core/cat/fastq/tests/main.nf.test
new file mode 100644
index 0000000..f5f9418
--- /dev/null
+++ b/modules/nf-core/cat/fastq/tests/main.nf.test
@@ -0,0 +1,143 @@
+nextflow_process {
+
+    name "Test Process CAT_FASTQ"
+    script "../main.nf"
+    process "CAT_FASTQ"
+    tag "modules"
+    tag "modules_nfcore"
+    tag "cat"
+    tag "cat/fastq"
+
+    test("test_cat_fastq_single_end") {
+
+        when {
+            params {
+                outdir   = "$outputDir"
+            }
+            process {
+                """
+                input[0] = [
+                            [ id:'test', single_end:true ], // meta map
+                            [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+                            file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true) ]
+                        ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.reads).match() },
+                { assert path(process.out.versions.get(0)).getText().contains("cat") }
+            )
+        }
+    }
+
+    test("test_cat_fastq_paired_end") {
+
+        when {
+            params {
+                outdir   = "$outputDir"
+            }
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:false ], // meta map
+                    [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+                    file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true),
+                    file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true),
+                    file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) ]
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.reads).match() },
+                { assert path(process.out.versions.get(0)).getText().contains("cat") }
+            )
+        }
+    }
+
+    test("test_cat_fastq_single_end_same_name") {
+
+        when {
+            params {
+                outdir   = "$outputDir"
+            }
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:true ], // meta map
+                    [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+                    file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.reads).match() },
+                { assert path(process.out.versions.get(0)).getText().contains("cat") }
+            )
+        }
+    }
+
+    test("test_cat_fastq_paired_end_same_name") {
+
+        when {
+            params {
+                outdir   = "$outputDir"
+            }
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:false ], // meta map
+                    [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+                    file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true),
+                    file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+                    file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.reads).match() },
+                { assert path(process.out.versions.get(0)).getText().contains("cat") }
+            )
+        }
+    }
+
+    test("test_cat_fastq_single_end_single_file") {
+
+        when {
+            params {
+                outdir   = "$outputDir"
+            }
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:true ], // meta map
+                    [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)]
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.reads).match() },
+                { assert path(process.out.versions.get(0)).getText().contains("cat") }
+            )
+        }
+    }
+}
diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test.snap b/modules/nf-core/cat/fastq/tests/main.nf.test.snap
new file mode 100644
index 0000000..ec2342e
--- /dev/null
+++ b/modules/nf-core/cat/fastq/tests/main.nf.test.snap
@@ -0,0 +1,78 @@
+{
+    "test_cat_fastq_single_end": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": true
+                    },
+                    "test.merged.fastq.gz:md5,f9cf5e375f7de81a406144a2c70cc64d"
+                ]
+            ]
+        ],
+        "timestamp": "2023-10-17T23:19:12.990284837"
+    },
+    "test_cat_fastq_single_end_same_name": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": true
+                    },
+                    "test.merged.fastq.gz:md5,63f817db7a29a03eb538104495556f66"
+                ]
+            ]
+        ],
+        "timestamp": "2023-10-17T23:19:31.554568147"
+    },
+    "test_cat_fastq_single_end_single_file": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": true
+                    },
+                    "test.merged.fastq.gz:md5,e325ef7deb4023447a1f074e285761af"
+                ]
+            ]
+        ],
+        "timestamp": "2023-10-17T23:19:49.629360033"
+    },
+    "test_cat_fastq_paired_end_same_name": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    [
+                        "test_1.merged.fastq.gz:md5,63f817db7a29a03eb538104495556f66",
+                        "test_2.merged.fastq.gz:md5,fe9f266f43a6fc3dcab690a18419a56e"
+                    ]
+                ]
+            ]
+        ],
+        "timestamp": "2023-10-17T23:19:40.711617539"
+    },
+    "test_cat_fastq_paired_end": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    [
+                        "test_1.merged.fastq.gz:md5,f9cf5e375f7de81a406144a2c70cc64d",
+                        "test_2.merged.fastq.gz:md5,77c8e966e130d8c6b6ec9be52fcb2bda"
+                    ]
+                ]
+            ]
+        ],
+        "timestamp": "2023-10-18T07:53:20.923560211"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/cat/fastq/tests/tags.yml b/modules/nf-core/cat/fastq/tests/tags.yml
new file mode 100644
index 0000000..6ac4361
--- /dev/null
+++ b/modules/nf-core/cat/fastq/tests/tags.yml
@@ -0,0 +1,2 @@
+cat/fastq:
+  - modules/nf-core/cat/fastq/**
diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml
index 9d0e6b2..f0c63f6 100644
--- a/modules/nf-core/custom/dumpsoftwareversions/environment.yml
+++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml
@@ -1,3 +1,4 @@
+name: custom_dumpsoftwareversions
 channels:
   - conda-forge
   - bioconda
diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml
index 9414c32..5f15a5f 100644
--- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml
+++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml
@@ -1,4 +1,4 @@
-# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
 name: custom_dumpsoftwareversions
 description: Custom module used to dump software versions within the nf-core pipeline template
 keywords:
diff --git a/modules/nf-core/fastp/environment.yml b/modules/nf-core/fastp/environment.yml
new file mode 100644
index 0000000..70389e6
--- /dev/null
+++ b/modules/nf-core/fastp/environment.yml
@@ -0,0 +1,7 @@
+name: fastp
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::fastp=0.23.4
diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf
index 9c747d3..c8e815a 100644
--- a/modules/nf-core/fastp/main.nf
+++ b/modules/nf-core/fastp/main.nf
@@ -2,7 +2,7 @@ process FASTP {
     tag "$meta.id"
     label 'process_medium'
 
-    conda "bioconda::fastp=0.23.4"
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/fastp:0.23.4--h5f740d0_0' :
         'biocontainers/fastp:0.23.4--h5f740d0_0' }"
@@ -99,22 +99,4 @@ process FASTP {
         END_VERSIONS
         """
     }
-
-    stub:
-    def prefix              = task.ext.prefix ?: "${meta.id}"
-    def isSingleOutput      = task.ext.args?.contains('--interleaved_in') || meta.single_end
-    def outputFiles         = isSingleOutput ? "${prefix}.fastp.fastq.gz" : "${prefix}_1.fastp.fastq.gz ${prefix}_2.fastp.fastq.gz"
-    def mergedFileCommand   = (!isSingleOutput && save_merged) ? "touch ${prefix}.merged.fastq.gz" : ""
-    """
-    touch $outputFiles
-    touch "${prefix}.json"
-    touch "${prefix}.html"
-    touch "${prefix}.log"
-    $mergedFileCommand
-    
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
-    END_VERSIONS
-    """
 }
diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml
index 1c34ff9..c22a16a 100644
--- a/modules/nf-core/fastp/meta.yml
+++ b/modules/nf-core/fastp/meta.yml
@@ -33,7 +33,6 @@ input:
   - save_merged:
       type: boolean
       description: Specify true to save all merged reads to the a file ending in `*.merged.fastq.gz`
-
 output:
   - meta:
       type: map
@@ -71,4 +70,6 @@ output:
 authors:
   - "@drpatelh"
   - "@kevinmenden"
-  - "@gallvp"
+maintainers:
+  - "@drpatelh"
+  - "@kevinmenden"
diff --git a/modules/nf-core/fastp/tests/main.nf.test b/modules/nf-core/fastp/tests/main.nf.test
new file mode 100644
index 0000000..f610b73
--- /dev/null
+++ b/modules/nf-core/fastp/tests/main.nf.test
@@ -0,0 +1,485 @@
+nextflow_process {
+
+    name "Test Process FASTP"
+    script "../main.nf"
+    process "FASTP"
+    tag "modules"
+    tag "modules_nfcore"
+    tag "fastp"
+
+    test("test_fastp_single_end") {
+
+        when {
+            params {
+                outdir   = "$outputDir"
+            }
+            process {
+                """
+                adapter_fasta     = []
+                save_trimmed_fail = false
+                save_merged       = false
+
+                input[0] = [
+                    [ id:'test', single_end:true ],
+                    [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
+                ]
+
+                input[1] = adapter_fasta
+                input[2] = save_trimmed_fail
+                input[3] = save_merged
+                """
+            }
+        }
+
+        then {
+            def html_text  = [ "Q20 bases:</td><td class='col2'>12.922000 K (92.984097%)",
+                                "single end (151 cycles)" ]
+            def log_text   = [ "Q20 bases: 12922(92.9841%)",
+								"reads passed filter: 99" ]
+            def read_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1",
+								"TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
+								"AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE<EEAAAEEEEEEEEEAAAAEAEEEAEEEEEE<AAAA",
+								"@ERR5069949.576388 NS500628:121:HK3MMAFX2:4:11501:11167:14939/1",
+								"ACTGTTTTCTTTGTAGAAAACATCCGTAATAGGACCTTTGTATTCTGAGGACTTTGTAAGTAAAGCACCGTCTATGC",
+								"AAA6AEEEEEEEEEAEEE/6EEAEEEAEEEEEAEEEEEEEEEEEEEEEEEEEEE<AAEEEEEEEEEEE</EEEA/AE"]
+            assertAll(
+                { assert process.success },
+                { read_lines.each { read_line ->
+                    { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) }
+                    }
+                },
+                { html_text.each { html_part ->
+                    { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
+                    }
+                },
+                { assert snapshot(process.out.json).match("test_fastp_single_end_json") },
+                { log_text.each { log_part ->
+                    { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
+                    }
+                },
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+    }
+
+    test("test_fastp_paired_end") {
+
+        when {
+            params {
+                outdir   = "$outputDir"
+            }
+            process {
+                """
+                adapter_fasta     = []
+                save_trimmed_fail = false
+                save_merged       = false
+
+                input[0] = [
+                    [ id:'test', single_end:false ], // meta map
+                    [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+                        file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
+                ]
+
+                input[1] = adapter_fasta
+                input[2] = save_trimmed_fail
+                input[3] = save_merged
+                """
+            }
+        }
+
+        then {
+            def html_text   = [ "Q20 bases:</td><td class='col2'>25.719000 K (93.033098%)",
+									"The input has little adapter percentage (~0.000000%), probably it's trimmed before."]
+            def log_text    = [ "No adapter detected for read1",
+									"Q30 bases: 12281(88.3716%)"]
+            def json_text   = ['"passed_filter_reads": 198']
+            def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1",
+									"TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
+									"AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE<EEAAAEEEEEEEEEAAAAEAEEEAEEEEEE<AAAA",
+									"@ERR5069949.576388 NS500628:121:HK3MMAFX2:4:11501:11167:14939/1",
+									"ACTGTTTTCTTTGTAGAAAACATCCGTAATAGGACCTTTGTATTCTGAGGACTTTGTAAGTAAAGCACCGTCTATGC",
+									"AAA6AEEEEEEEEEAEEE/6EEAEEEAEEEEEAEEEEEEEEEEEEEEEEEEEEE<AAEEEEEEEEEEE</EEEA/AE"]
+            def read2_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/2",
+									"ATGTGTACATTGGCGACCCTGCTCAATTACCTGCACCACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAATTCAGTGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGTCGGCGTTGTCCTG",
+									"AAAAAEEEEEEEEEE/EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEEEEEEEE/EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEEEEAEEEEEAAEEEEEEEEEAAEAAA<<EAAEEEEEEEAAA<<<AE",
+									"@ERR5069949.576388 NS500628:121:HK3MMAFX2:4:11501:11167:14939/2",
+									"GCATAGACGGTGCTTTACTTACAAAGTCCTCAGAATACAAAGGTCCTATTACGGATGTTTTCTACAAAGAAAACAGT",
+									"AAAAA6EEAEEEEEAEEAEEAEEEEEEA6EEEEAEEAEEEEE6EEEEEEAEEEEA///A<<EEEEEEEEEAEEEEEE"]
+            assertAll(
+                { assert process.success },
+                { read1_lines.each { read1_line ->
+                    { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) }
+                    }
+                },
+                { read2_lines.each { read2_line ->
+                    { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) }
+                    }
+                },
+                { html_text.each { html_part ->
+                    { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
+                    }
+                },
+                { json_text.each { json_part ->
+                    { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) }
+                    }
+                },
+                { log_text.each { log_part ->
+                    { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
+                    }
+                },
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+    }
+
+    test("fastp test_fastp_interleaved") {
+        config './nextflow.config'
+        when {
+            params {
+                outdir   = "$outputDir"
+            }
+            process {
+                """
+                adapter_fasta     = []
+                save_trimmed_fail = false
+                save_merged       = false
+
+                input[0] = [ [ id:'test', single_end:true ], // meta map
+                            [ file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) ]
+                        ]
+
+                input[1] = adapter_fasta
+                input[2] = save_trimmed_fail
+                input[3] = save_merged
+                """
+            }
+        }
+
+        then {
+            def html_text  = [ "Q20 bases:</td><td class='col2'>25.719000 K (93.033098%)",
+									"paired end (151 cycles + 151 cycles)"]
+            def log_text   = [ "Q20 bases: 12922(92.9841%)",
+									"reads passed filter: 198"]
+            def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1",
+									"TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
+									"AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE<EEAAAEEEEEEEEEAAAAEAEEEAEEEEEE<AAAA",
+									"@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/2",
+									"ATGTGTACATTGGCGACCCTGCTCAATTACCTGCACCACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAATTCAGTGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGTCGGCGTTGTCCTG",
+									"AAAAAEEEEEEEEEE/EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEEEEEEEE/EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEEEEAEEEEEAAEEEEEEEEEAAEAAA<<EAAEEEEEEEAAA<<<AE"]
+            assertAll(
+                { assert process.success },
+                { read_lines.each { read_line ->
+                    { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) }
+                    }
+                },
+                { html_text.each { html_part ->
+                    { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
+                    }
+                },
+                { assert snapshot(process.out.json).match("fastp test_fastp_interleaved_json") },
+                { log_text.each { log_part ->
+                    { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
+                    }
+                },
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+    }
+
+    test("test_fastp_single_end_trim_fail") {
+
+        when {
+            params {
+                outdir   = "$outputDir"
+            }
+            process {
+                """
+                adapter_fasta     = []
+                save_trimmed_fail = true
+                save_merged       = false
+
+                input[0] = [ [ id:'test', single_end:true ], // meta map
+                            [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
+                        ]
+                input[1] = adapter_fasta
+                input[2] = save_trimmed_fail
+                input[3] = save_merged
+                """
+            }
+        }
+
+        then {
+            def html_text         = [ "Q20 bases:</td><td class='col2'>12.922000 K (92.984097%)",
+									"single end (151 cycles)"]
+            def log_text          = [ "Q20 bases: 12922(92.9841%)",
+									"reads passed filter: 99" ]
+            def read_lines        = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1",
+									"TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
+									"AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE<EEAAAEEEEEEEEEAAAAEAEEEAEEEEEE<AAAA",
+									"@ERR5069949.576388 NS500628:121:HK3MMAFX2:4:11501:11167:14939/1",
+									"ACTGTTTTCTTTGTAGAAAACATCCGTAATAGGACCTTTGTATTCTGAGGACTTTGTAAGTAAAGCACCGTCTATGC",
+									"AAA6AEEEEEEEEEAEEE/6EEAEEEAEEEEEAEEEEEEEEEEEEEEEEEEEEE<AAEEEEEEEEEEE</EEEA/AE"]
+            def failed_read_lines = [ "@ERR5069949.885966 NS500628:121:HK3MMAFX2:4:11610:19682:20132/1 failed_quality_filter",
+									"GTCTAATCATAATTTCTTGGTACAGGCTGGTATTGTTCATCTCAGGGTTATTGGACATTCTATGCAAAATTGTGTACTT",
+									"AAA//E/EAA/E//E//E//E/E//AE/A/E//EAEA///AE//E///E/EEE6EEEAEEA///E/AEE/EAEE/E//E"]
+            assertAll(
+                { assert process.success },
+                { read_lines.each { read_line ->
+                    { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) }
+                    }
+                },
+                { failed_read_lines.each { failed_read_line ->
+                    { assert path(process.out.reads_fail.get(0).get(1)).linesGzip.contains(failed_read_line) }
+                    }
+                },
+                { html_text.each { html_part ->
+                    { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
+                    }
+                },
+                { assert snapshot(process.out.json).match("test_fastp_single_end_trim_fail_json") },
+                { log_text.each { log_part ->
+                    { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
+                    }
+                },
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+    }
+
+    test("test_fastp_paired_end_trim_fail") {
+
+        when {
+            params {
+                outdir   = "$outputDir"
+            }
+            process {
+                """
+                adapter_fasta     = []
+                save_trimmed_fail = true
+                save_merged       = false
+
+                input[0] = [
+                    [ id:'test', single_end:false ], // meta map
+                    [
+                        file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+                        file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
+                    ]
+                ]
+                input[1] = adapter_fasta
+                input[2] = save_trimmed_fail
+                input[3] = save_merged
+                """
+            }
+        }
+
+        then {
+            def html_text           = [ "Q20 bases:</td><td class='col2'>25.719000 K (93.033098%)",
+									"The input has little adapter percentage (~0.000000%), probably it's trimmed before."]
+            def log_text            = [ "No adapter detected for read1",
+									"Q30 bases: 12281(88.3716%)"]
+            def json_text           = ['"passed_filter_reads": 198']
+            def read1_lines         = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1",
+									"TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
+									"AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE<EEAAAEEEEEEEEEAAAAEAEEEAEEEEEE<AAAA",
+									"@ERR5069949.576388 NS500628:121:HK3MMAFX2:4:11501:11167:14939/1",
+									"ACTGTTTTCTTTGTAGAAAACATCCGTAATAGGACCTTTGTATTCTGAGGACTTTGTAAGTAAAGCACCGTCTATGC",
+									"AAA6AEEEEEEEEEAEEE/6EEAEEEAEEEEEAEEEEEEEEEEEEEEEEEEEEE<AAEEEEEEEEEEE</EEEA/AE"]
+            def read2_lines         = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/2",
+									"ATGTGTACATTGGCGACCCTGCTCAATTACCTGCACCACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAATTCAGTGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGTCGGCGTTGTCCTG",
+									"AAAAAEEEEEEEEEE/EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEEEEEEEE/EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEEEEAEEEEEAAEEEEEEEEEAAEAAA<<EAAEEEEEEEAAA<<<AE",
+									"@ERR5069949.576388 NS500628:121:HK3MMAFX2:4:11501:11167:14939/2",
+									"GCATAGACGGTGCTTTACTTACAAAGTCCTCAGAATACAAAGGTCCTATTACGGATGTTTTCTACAAAGAAAACAGT",
+									"AAAAA6EEAEEEEEAEEAEEAEEEEEEA6EEEEAEEAEEEEE6EEEEEEAEEEEA///A<<EEEEEEEEEAEEEEEE"]
+            def failed_read2_lines  = ["@ERR5069949.885966 NS500628:121:HK3MMAFX2:4:11610:19682:20132/2",
+									"CTTAGGTCTTAGGATTGGCTGTATCAACCTTAAGCTTAAGTACACAATTTTGCATAGAATGTCCAATAA",
+									"A//AA6EEAEEEEE6EEE/EEA/EA///AAE/EAEEEAE6AE/E/E/EEAAE/EAA/E/E/<EA//E/6"]
+            // def failed_read1_lines = path(process.out.reads_fail.get(0).get(1).get(0)).linesGzip is empty file
+            assertAll(
+                { assert process.success },
+                { assert path(process.out.reads_fail.get(0).get(1).get(0)).exists() },
+                { read1_lines.each { read1_line ->
+                    { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) }
+                    }
+                },
+                { read2_lines.each { read2_line ->
+                    { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) }
+                    }
+                },
+                { failed_read2_lines.each { failed_read2_line ->
+                    { assert path(process.out.reads_fail.get(0).get(1).get(1)).linesGzip.contains(failed_read2_line) }
+                    }
+                },
+                { html_text.each { html_part ->
+                    { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
+                    }
+                },
+                { json_text.each { json_part ->
+                    { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) }
+                    }
+                },
+                { log_text.each { log_part ->
+                    { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
+                    }
+                },
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+    }
+
+    test("test_fastp_paired_end_merged") {
+
+        when {
+            params {
+                outdir   = "$outputDir"
+            }
+            process {
+                """
+                adapter_fasta     = []
+                save_trimmed_fail = false
+                save_merged       = true
+
+                input[0] = [ [ id:'test', single_end:false ], // meta map
+                             [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+                               file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
+                           ]
+                input[1] = adapter_fasta
+                input[2] = save_trimmed_fail
+                input[3] = save_merged
+                """
+            }
+        }
+
+        then {
+            def html_text         = [ "<div id='After_filtering__merged__quality'>"]
+            def log_text          = [ "Merged and filtered:",
+									"total reads: 75",
+									"total bases: 13683"]
+            def json_text         = ['"merged_and_filtered": {', '"total_reads": 75', '"total_bases": 13683']
+            def read1_lines       = [ "@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/1",
+									"CCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTC",
+									"AAAAAEAEEAEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEAEEEEEEEEEEEEEEEEE/EAEEEEEE/6EEEEEEEEEEAEEAEEE/EE/AEEAEEEEEAEEEA/EEAAEAE<AEEAEEEAEAEEEAEAEEAE/AEEEEAEEEEAEA",
+									"@ERR5069949.324865 NS500628:121:HK3MMAFX2:1:11102:17526:14721/1",
+									"CACAAACTCTAAAAGAATGTATAGGGTCAGCACCAAAAATACCAGCAGATAATAATGTTGCAAGTAGAACTTCGTGCAGATTAAAATTTTCATAAGCACTCTAAAGAAGTTGAATGTCTTCAAATTTCTTAACATTAGGGCCCACAACAAG",
+									"AAAAAEA/A<EAA/AE/EE/EE//////EA/EEE/E/EEEE//E/6//EA//<AA/A/EEEAA/EEEE/EEEA/E/</AEE////AEEEE//<E//EAE/A///<EEE//<E<<EEE<///A//E/E/EEEAA/<A////<A/AEAAA//E"]
+            def read2_lines       = ["@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/2",
+									"GTACAAAAATAGCCTAAGAAACAATAAACTAGCATTATACACTGAAGTGTATTACCAGTTATGAAGAAAATAGGGCAATACTCAACACACATAAAAACAATACCTCTGGCCAAAAACATGACAGTTGTAACTACACCTGAGTAGTTAGAAG",
+									"AAAAAEEEEEEEEAEEEEEEEEEEEEEEEEEEEEEEEEAEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEEEEEEA/EAA</EEEEEEEEEE/AE//A/E<AE<AA<AEEE/AAEAAAEEAEEA<A6AEEA<EEAEEEEEEEAA//EE",
+									"@ERR5069949.324865 NS500628:121:HK3MMAFX2:1:11102:17526:14721/2",
+									"ATGAATCTGATGAATACATAGCTACTAATGGACCTCTTAAAGTGCGTGGTAGTTGTGATTAAAGCGGACACATACTTGCTAAACACTCTCTTCATGATGTC",
+									"A/AAAEEEEA6AA6EE//EEA/EEEAE/EA/A////E</EEAA//EEA////EAE<///E/AEA</AAE/EA//E<EAAAE/AA//AEE//A/AE//</EE"]
+            def read_merged_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1 merged_150_37",
+									"TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGTGGTGCAGGTAATTGAGCAGGGTCGCCAATGTACACAT",
+									"AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE<EEAAAEEEEEEEEEAAAAEAEEEAEEEEEE<AAAAEEEEEEEEEEEEEEEEEEEEE/EEEEEEEEEEAAAAA",
+									"@ERR5069949.576388 NS500628:121:HK3MMAFX2:4:11501:11167:14939/1 merged_77_0",
+									"ACTGTTTTCTTTGTAGAAAACATCCGTAATAGGACCTTTGTATTCTGAGGACTTTGTAAGTAAAGCACCGTCTATGC",
+									"AAA6AEEEEEEEEEAEEE/6EEAEEEAEEEEEAEEEEEEEEEEEEEEEEEEEEE<AAEEEEEEEEEEE</EEEA/AE"]
+            assertAll(
+                { assert process.success },
+                { read1_lines.each { read1_line ->
+                    { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) }
+                    }
+                },
+                { read2_lines.each { read2_line ->
+                    { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) }
+                    }
+                },
+                { read_merged_lines.each { read_merged_line ->
+                    { assert path(process.out.reads_merged.get(0).get(1)).linesGzip.contains(read_merged_line) }
+                    }
+                },
+                { html_text.each { html_part ->
+                    { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
+                    }
+                },
+                { json_text.each { json_part ->
+                    { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) }
+                    }
+                },
+                { log_text.each { log_part ->
+                    { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
+                    }
+                },
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+    }
+
+    test("test_fastp_paired_end_merged_adapterlist") {
+
+        when {
+            params {
+                outdir   = "$outputDir"
+            }
+            process {
+                """
+                adapter_fasta     = file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/fastp/adapters.fasta", checkIfExists: true)
+                save_trimmed_fail = false
+                save_merged       = true
+
+                input[0] = [ [ id:'test', single_end:false ], // meta map
+                             [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+                               file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
+                           ]
+                input[1] = adapter_fasta
+                input[2] = save_trimmed_fail
+                input[3] = save_merged
+                """
+            }
+        }
+
+        then {
+            def html_text         = [ "<div id='After_filtering__merged__quality'>"]
+            def log_text          = [ "Merged and filtered:",
+									"total reads: 75",
+									"total bases: 13683"]
+            def json_text         = ['"merged_and_filtered": {', '"total_reads": 75', '"total_bases": 13683',"--adapter_fasta"]
+            def read1_lines       = ["@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/1",
+									"CCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTC",
+									"AAAAAEAEEAEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEAEEEEEEEEEEEEEEEEE/EAEEEEEE/6EEEEEEEEEEAEEAEEE/EE/AEEAEEEEEAEEEA/EEAAEAE<AEEAEEEAEAEEEAEAEEAE/AEEEEAEEEEAEA",
+									"@ERR5069949.324865 NS500628:121:HK3MMAFX2:1:11102:17526:14721/1",
+									"CACAAACTCTAAAAGAATGTATAGGGTCAGCACCAAAAATACCAGCAGATAATAATGTTGCAAGTAGAACTTCGTGCAGATTAAAATTTTCATAAGCACTCTAAAGAAGTTGAATGTCTTCAAATTTCTTAACATTAGGGCCCACAACAAG",
+									"AAAAAEA/A<EAA/AE/EE/EE//////EA/EEE/E/EEEE//E/6//EA//<AA/A/EEEAA/EEEE/EEEA/E/</AEE////AEEEE//<E//EAE/A///<EEE//<E<<EEE<///A//E/E/EEEAA/<A////<A/AEAAA//E"]
+            def read2_lines       = ["@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/2",
+									"GTACAAAAATAGCCTAAGAAACAATAAACTAGCATTATACACTGAAGTGTATTACCAGTTATGAAGAAAATAGGGCAATACTCAACACACATAAAAACAATACCTCTGGCCAAAAACATGACAGTTGTAACTACACCTGAGTAGTTAGAAG",
+									"AAAAAEEEEEEEEAEEEEEEEEEEEEEEEEEEEEEEEEAEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEEEEEEA/EAA</EEEEEEEEEE/AE//A/E<AE<AA<AEEE/AAEAAAEEAEEA<A6AEEA<EEAEEEEEEEAA//EE",
+									"@ERR5069949.324865 NS500628:121:HK3MMAFX2:1:11102:17526:14721/2",
+									"ATGAATCTGATGAATACATAGCTACTAATGGACCTCTTAAAGTGCGTGGTAGTTGTGATTAAAGCGGACACATACTTGCTAAACACTCTCTTCATGATGTC",
+									"A/AAAEEEEA6AA6EE//EEA/EEEAE/EA/A////E</EEAA//EEA////EAE<///E/AEA</AAE/EA//E<EAAAE/AA//AEE//A/AE//</EE"]
+            def read_merged_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1 merged_150_37",
+									"TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGTGGTGCAGGTAATTGAGCAGGGTCGCCAATGTACACAT",
+									"AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE<EEAAAEEEEEEEEEAAAAEAEEEAEEEEEE<AAAAEEEEEEEEEEEEEEEEEEEEE/EEEEEEEEEEAAAAA",
+									"@ERR5069949.576388 NS500628:121:HK3MMAFX2:4:11501:11167:14939/1 merged_77_0",
+									"ACTGTTTTCTTTGTAGAAAACATCCGTAATAGGACCTTTGTATTCTGAGGACTTTGTAAGTAAAGCACCGTCTATGC",
+									"AAA6AEEEEEEEEEAEEE/6EEAEEEAEEEEEAEEEEEEEEEEEEEEEEEEEEE<AAEEEEEEEEEEE</EEEA/AE"]
+
+            assertAll(
+                { assert process.success },
+                { read1_lines.each { read1_line ->
+                    { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) }
+                    }
+                },
+                { read2_lines.each { read2_line ->
+                    { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) }
+                    }
+                },
+                { read_merged_lines.each { read_merged_line ->
+                    { assert path(process.out.reads_merged.get(0).get(1)).linesGzip.contains(read_merged_line) }
+                    }
+                },
+                { html_text.each { html_part ->
+                    { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
+                    }
+                },
+                { json_text.each { json_part ->
+                    { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) }
+                    }
+                },
+                { log_text.each { log_part ->
+                    { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
+                    }
+                },
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+    }
+}
diff --git a/modules/nf-core/fastp/tests/main.nf.test.snap b/modules/nf-core/fastp/tests/main.nf.test.snap
new file mode 100644
index 0000000..0fa68c7
--- /dev/null
+++ b/modules/nf-core/fastp/tests/main.nf.test.snap
@@ -0,0 +1,52 @@
+{
+    "fastp test_fastp_interleaved_json": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": true
+                    },
+                    "test.fastp.json:md5,168f516f7bd4b7b6c32da7cba87299a4"
+                ]
+            ]
+        ],
+        "timestamp": "2023-10-17T11:04:45.794175881"
+    },
+    "test_fastp_single_end_json": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": true
+                    },
+                    "test.fastp.json:md5,c852d7a6dba5819e4ac8d9673bedcacc"
+                ]
+            ]
+        ],
+        "timestamp": "2023-10-17T11:04:10.566343705"
+    },
+    "versions": {
+        "content": [
+            [
+                "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
+            ]
+        ],
+        "timestamp": "2023-10-17T11:04:10.582076024"
+    },
+    "test_fastp_single_end_trim_fail_json": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": true
+                    },
+                    "test.fastp.json:md5,9a7ee180f000e8d00c7fb67f06293eb5"
+                ]
+            ]
+        ],
+        "timestamp": "2023-10-17T11:05:00.379878948"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/fastp/tests/nextflow.config b/modules/nf-core/fastp/tests/nextflow.config
new file mode 100644
index 0000000..0f7849a
--- /dev/null
+++ b/modules/nf-core/fastp/tests/nextflow.config
@@ -0,0 +1,6 @@
+process {
+
+    withName: FASTP {
+        ext.args = "--interleaved_in"
+    }
+}
diff --git a/modules/nf-core/fastp/tests/tags.yml b/modules/nf-core/fastp/tests/tags.yml
new file mode 100644
index 0000000..c1afcce
--- /dev/null
+++ b/modules/nf-core/fastp/tests/tags.yml
@@ -0,0 +1,2 @@
+fastp:
+  - modules/nf-core/fastp/**
diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml
new file mode 100644
index 0000000..1787b38
--- /dev/null
+++ b/modules/nf-core/fastqc/environment.yml
@@ -0,0 +1,7 @@
+name: fastqc
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::fastqc=0.12.1
diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf
index 249f906..50e59f2 100644
--- a/modules/nf-core/fastqc/main.nf
+++ b/modules/nf-core/fastqc/main.nf
@@ -2,10 +2,10 @@ process FASTQC {
     tag "$meta.id"
     label 'process_medium'
 
-    conda "bioconda::fastqc=0.11.9"
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' :
-        'biocontainers/fastqc:0.11.9--0' }"
+        'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' :
+        'biocontainers/fastqc:0.12.1--hdfd78af_0' }"
 
     input:
     tuple val(meta), path(reads)
diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml
index 4da5bb5..ee5507e 100644
--- a/modules/nf-core/fastqc/meta.yml
+++ b/modules/nf-core/fastqc/meta.yml
@@ -50,3 +50,8 @@ authors:
   - "@grst"
   - "@ewels"
   - "@FelixKrueger"
+maintainers:
+  - "@drpatelh"
+  - "@grst"
+  - "@ewels"
+  - "@FelixKrueger"
diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test
index 3961de6..6437a14 100644
--- a/modules/nf-core/fastqc/tests/main.nf.test
+++ b/modules/nf-core/fastqc/tests/main.nf.test
@@ -1,13 +1,18 @@
 nextflow_process {
 
     name "Test Process FASTQC"
-    script "modules/nf-core/fastqc/main.nf"
+    script "../main.nf"
     process "FASTQC"
+    tag "modules"
+    tag "modules_nfcore"
     tag "fastqc"
 
     test("Single-Read") {
 
         when {
+            params {
+                outdir   = "$outputDir"
+            }
             process {
                 """
                 input[0] = [
@@ -21,12 +26,16 @@ nextflow_process {
         }
 
         then {
-            assert process.success
-            assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html"
-            assert path(process.out.html.get(0).get(1)).getText().contains("<tr><td>File type</td><td>Conventional base calls</td></tr>")
-            assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip"
+            assertAll (
+            { assert process.success },
+            // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it.
+            // looks like this: <div id="header_filename">Mon 2 Oct 2023<br/>test.gz</div>
+            // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039
+            { assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html" },
+            { assert path(process.out.html.get(0).get(1)).getText().contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
+            { assert snapshot(process.out.versions).match("versions") },
+            { assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip" }
+            )
         }
-
     }
-
 }
diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap
new file mode 100644
index 0000000..636a32c
--- /dev/null
+++ b/modules/nf-core/fastqc/tests/main.nf.test.snap
@@ -0,0 +1,10 @@
+{
+    "versions": {
+        "content": [
+            [
+                "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+            ]
+        ],
+        "timestamp": "2023-10-09T23:40:54+0000"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/fastqc/tests/tags.yml b/modules/nf-core/fastqc/tests/tags.yml
new file mode 100644
index 0000000..7834294
--- /dev/null
+++ b/modules/nf-core/fastqc/tests/tags.yml
@@ -0,0 +1,2 @@
+fastqc:
+  - modules/nf-core/fastqc/**
diff --git a/modules/nf-core/gffread/environment.yml b/modules/nf-core/gffread/environment.yml
index d127cae..5398f71 100644
--- a/modules/nf-core/gffread/environment.yml
+++ b/modules/nf-core/gffread/environment.yml
@@ -1,3 +1,4 @@
+name: gffread
 channels:
   - conda-forge
   - bioconda
diff --git a/modules/nf-core/gffread/main.nf b/modules/nf-core/gffread/main.nf
index d1477ab..68f8045 100644
--- a/modules/nf-core/gffread/main.nf
+++ b/modules/nf-core/gffread/main.nf
@@ -1,5 +1,5 @@
 process GFFREAD {
-    tag "$meta.id"
+    tag "$gff"
     label 'process_low'
 
     conda "${moduleDir}/environment.yml"
@@ -8,25 +8,23 @@ process GFFREAD {
         'biocontainers/gffread:0.12.1--h8b12597_0' }"
 
     input:
-    tuple val(meta), path(gff)
+    path gff
 
     output:
-    tuple val(meta), path("*.gtf")  , emit: gtf, optional: true
-    tuple val(meta), path("*.gff3") , emit: gff, optional: true
-    path "versions.yml"             , emit: versions
+    path "*.gtf"        , emit: gtf
+    path "versions.yml" , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
 
     script:
-    def args        = task.ext.args         ?: ''
-    def prefix      = task.ext.prefix       ?: "${gff.baseName}"
-    def extension   = args.contains("-T")   ?  '.gtf' : '.gff3'
+    def args   = task.ext.args   ?: ''
+    def prefix = task.ext.prefix ?: "${gff.baseName}"
     """
     gffread \\
         $gff \\
         $args \\
-        -o ${prefix}.${extension}
+        -o ${prefix}.gtf
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         gffread: \$(gffread --version 2>&1)
diff --git a/modules/nf-core/gffread/meta.yml b/modules/nf-core/gffread/meta.yml
index 8a09a20..f486f8b 100644
--- a/modules/nf-core/gffread/meta.yml
+++ b/modules/nf-core/gffread/meta.yml
@@ -28,6 +28,5 @@ output:
       pattern: "versions.yml"
 authors:
   - "@emiller88"
-  - "@gallvp"
 maintainers:
   - "@emiller88"
diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/gunzip/environment.yml
new file mode 100644
index 0000000..25910b3
--- /dev/null
+++ b/modules/nf-core/gunzip/environment.yml
@@ -0,0 +1,7 @@
+name: gunzip
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - conda-forge::sed=4.7
diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf
index 73bf08c..468a6f2 100644
--- a/modules/nf-core/gunzip/main.nf
+++ b/modules/nf-core/gunzip/main.nf
@@ -2,7 +2,7 @@ process GUNZIP {
     tag "$archive"
     label 'process_single'
 
-    conda "conda-forge::sed=4.7"
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
         'nf-core/ubuntu:20.04' }"
diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml
index 4cdcdf4..231034f 100644
--- a/modules/nf-core/gunzip/meta.yml
+++ b/modules/nf-core/gunzip/meta.yml
@@ -33,3 +33,7 @@ authors:
   - "@joseespinosa"
   - "@drpatelh"
   - "@jfy133"
+maintainers:
+  - "@joseespinosa"
+  - "@drpatelh"
+  - "@jfy133"
diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test
new file mode 100644
index 0000000..d031792
--- /dev/null
+++ b/modules/nf-core/gunzip/tests/main.nf.test
@@ -0,0 +1,35 @@
+nextflow_process {
+
+    name "Test Process GUNZIP"
+    script "../main.nf"
+    process "GUNZIP"
+    tag "gunzip"
+    tag "modules_nfcore"
+    tag "modules"
+
+    test("Should run without failures") {
+
+        when {
+            params {
+                outdir = "$outputDir"
+            }
+            process {
+                """
+                input[0] = [
+                                [],
+                                file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
+                            ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+            { assert process.success },
+            { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+}
diff --git a/modules/nf-core/gunzip/tests/main.nf.test.snap b/modules/nf-core/gunzip/tests/main.nf.test.snap
new file mode 100644
index 0000000..720fd9f
--- /dev/null
+++ b/modules/nf-core/gunzip/tests/main.nf.test.snap
@@ -0,0 +1,31 @@
+{
+    "Should run without failures": {
+        "content": [
+            {
+                "0": [
+                    [
+                        [
+                            
+                        ],
+                        "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,54376d32aca20e937a4ec26dac228e84"
+                ],
+                "gunzip": [
+                    [
+                        [
+                            
+                        ],
+                        "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,54376d32aca20e937a4ec26dac228e84"
+                ]
+            }
+        ],
+        "timestamp": "2023-10-17T15:35:37.690477896"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/gunzip/tests/tags.yml b/modules/nf-core/gunzip/tests/tags.yml
new file mode 100644
index 0000000..fd3f691
--- /dev/null
+++ b/modules/nf-core/gunzip/tests/tags.yml
@@ -0,0 +1,2 @@
+gunzip:
+  - modules/nf-core/gunzip/**
diff --git a/modules/nf-core/samtools/cat/environment.yml b/modules/nf-core/samtools/cat/environment.yml
new file mode 100644
index 0000000..80da1cf
--- /dev/null
+++ b/modules/nf-core/samtools/cat/environment.yml
@@ -0,0 +1,7 @@
+name: samtools_cat
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::samtools=1.17
diff --git a/modules/nf-core/samtools/cat/main.nf b/modules/nf-core/samtools/cat/main.nf
index 22a63e2..5d939aa 100644
--- a/modules/nf-core/samtools/cat/main.nf
+++ b/modules/nf-core/samtools/cat/main.nf
@@ -2,7 +2,7 @@ process SAMTOOLS_CAT {
     tag "$meta.id"
     label 'process_low'
 
-    conda "bioconda::samtools=1.17"
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' :
         'biocontainers/samtools:1.17--h00cdaf9_0' }"
diff --git a/modules/nf-core/samtools/cat/meta.yml b/modules/nf-core/samtools/cat/meta.yml
index 42632e7..3541e0c 100644
--- a/modules/nf-core/samtools/cat/meta.yml
+++ b/modules/nf-core/samtools/cat/meta.yml
@@ -47,3 +47,5 @@ output:
       pattern: "versions.yml"
 authors:
   - "@matthdsm"
+maintainers:
+  - "@matthdsm"
diff --git a/modules/nf-core/sortmerna/environment.yml b/modules/nf-core/sortmerna/environment.yml
new file mode 100644
index 0000000..3dae00a
--- /dev/null
+++ b/modules/nf-core/sortmerna/environment.yml
@@ -0,0 +1,7 @@
+name: sortmerna
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::sortmerna=4.3.4
diff --git a/modules/nf-core/sortmerna/main.nf b/modules/nf-core/sortmerna/main.nf
index 5b4fbca..53ccb97 100644
--- a/modules/nf-core/sortmerna/main.nf
+++ b/modules/nf-core/sortmerna/main.nf
@@ -2,7 +2,7 @@ process SORTMERNA {
     tag "$meta.id"
     label "process_high"
 
-    conda "bioconda::sortmerna=4.3.4"
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/sortmerna:4.3.4--h9ee0642_0' :
         'biocontainers/sortmerna:4.3.4--h9ee0642_0' }"
@@ -67,30 +67,4 @@ process SORTMERNA {
         END_VERSIONS
         """
     }
-
-    stub:
-    def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    if (meta.single_end) {
-        """
-        touch ${prefix}.non_rRNA.fastq.gz
-        touch ${prefix}.sortmerna.log
-
-        cat <<-END_VERSIONS > versions.yml
-        "${task.process}":
-            sortmerna: \$(echo \$(sortmerna --version 2>&1) | sed 's/^.*SortMeRNA version //; s/ Build Date.*\$//')
-        END_VERSIONS
-        """
-    } else {
-        """
-        touch ${prefix}_1.non_rRNA.fastq.gz
-        touch ${prefix}_2.non_rRNA.fastq.gz
-        touch ${prefix}.sortmerna.log
-
-        cat <<-END_VERSIONS > versions.yml
-        "${task.process}":
-            sortmerna: \$(echo \$(sortmerna --version 2>&1) | sed 's/^.*SortMeRNA version //; s/ Build Date.*\$//')
-        END_VERSIONS
-        """
-    }
 }
diff --git a/modules/nf-core/sortmerna/meta.yml b/modules/nf-core/sortmerna/meta.yml
index 66f00de..de0b18e 100644
--- a/modules/nf-core/sortmerna/meta.yml
+++ b/modules/nf-core/sortmerna/meta.yml
@@ -48,4 +48,6 @@ output:
 authors:
   - "@drpatelh"
   - "@mashehu"
-  - "@gallvp"
+maintainers:
+  - "@drpatelh"
+  - "@mashehu"
diff --git a/modules/nf-core/star/align/environment.yml b/modules/nf-core/star/align/environment.yml
new file mode 100644
index 0000000..6db2098
--- /dev/null
+++ b/modules/nf-core/star/align/environment.yml
@@ -0,0 +1,9 @@
+name: star_align
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::star=2.7.10a
+  - bioconda::samtools=1.16.1
+  - conda-forge::gawk=5.1.0
diff --git a/modules/nf-core/star/align/main.nf b/modules/nf-core/star/align/main.nf
index d0e2038..fa645a6 100644
--- a/modules/nf-core/star/align/main.nf
+++ b/modules/nf-core/star/align/main.nf
@@ -2,7 +2,7 @@ process STAR_ALIGN {
     tag "$meta.id"
     label 'process_high'
 
-    conda "bioconda::star=2.7.10a bioconda::samtools=1.16.1 conda-forge::gawk=5.1.0"
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' :
         'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' }"
diff --git a/modules/nf-core/star/align/meta.yml b/modules/nf-core/star/align/meta.yml
index 3d8fed0..e80dbb7 100644
--- a/modules/nf-core/star/align/meta.yml
+++ b/modules/nf-core/star/align/meta.yml
@@ -52,7 +52,6 @@ input:
   - seq_center:
       type: string
       description: Sequencing center
-
 output:
   - bam:
       type: file
@@ -106,8 +105,11 @@ output:
       type: file
       description: STAR output bedGraph format file(s) (optional)
       pattern: "*.bg"
-
 authors:
   - "@kevinmenden"
   - "@drpatelh"
   - "@praveenraj2018"
+maintainers:
+  - "@kevinmenden"
+  - "@drpatelh"
+  - "@praveenraj2018"
diff --git a/modules/nf-core/star/genomegenerate/environment.yml b/modules/nf-core/star/genomegenerate/environment.yml
new file mode 100644
index 0000000..0b35ff5
--- /dev/null
+++ b/modules/nf-core/star/genomegenerate/environment.yml
@@ -0,0 +1,9 @@
+name: star_genomegenerate
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::star=2.7.10a
+  - bioconda::samtools=1.16.1
+  - conda-forge::gawk=5.1.0
diff --git a/modules/nf-core/star/genomegenerate/main.nf b/modules/nf-core/star/genomegenerate/main.nf
index ed32d7c..473e62a 100644
--- a/modules/nf-core/star/genomegenerate/main.nf
+++ b/modules/nf-core/star/genomegenerate/main.nf
@@ -2,7 +2,7 @@ process STAR_GENOMEGENERATE {
     tag "$fasta"
     label 'process_high'
 
-    conda "bioconda::star=2.7.10a bioconda::samtools=1.16.1 conda-forge::gawk=5.1.0"
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' :
         'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' }"
@@ -10,7 +10,6 @@ process STAR_GENOMEGENERATE {
     input:
     tuple val(meta), path(fasta)
     tuple val(meta2), path(gtf)
-    val star_ignore_sjdbgtf
 
     output:
     tuple val(meta), path("star")  , emit: index
@@ -23,7 +22,6 @@ process STAR_GENOMEGENERATE {
     def args = task.ext.args ?: ''
     def args_list = args.tokenize()
     def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : ''
-    def ignore_gtf = star_ignore_sjdbgtf ? '' : "--sjdbGTFfile $gtf"
     if (args_list.contains('--genomeSAindexNbases')) {
         """
         mkdir star
@@ -31,7 +29,7 @@ process STAR_GENOMEGENERATE {
             --runMode genomeGenerate \\
             --genomeDir star/ \\
             --genomeFastaFiles $fasta \\
-            $ignore_gtf \\
+            --sjdbGTFfile $gtf \\
             --runThreadN $task.cpus \\
             $memory \\
             $args
@@ -53,7 +51,7 @@ process STAR_GENOMEGENERATE {
             --runMode genomeGenerate \\
             --genomeDir star/ \\
             --genomeFastaFiles $fasta \\
-            $ignore_gtf \\
+            --sjdbGTFfile $gtf \\
             --runThreadN $task.cpus \\
             --genomeSAindexNbases \$NUM_BASES \\
             $memory \\
diff --git a/modules/nf-core/star/genomegenerate/meta.yml b/modules/nf-core/star/genomegenerate/meta.yml
index e824dbf..1061e1b 100644
--- a/modules/nf-core/star/genomegenerate/meta.yml
+++ b/modules/nf-core/star/genomegenerate/meta.yml
@@ -31,7 +31,6 @@ input:
   - gtf:
       type: file
       description: GTF file of the reference genome
-
 output:
   - meta:
       type: map
@@ -46,8 +45,9 @@ output:
       type: file
       description: File containing software versions
       pattern: "versions.yml"
-
 authors:
   - "@kevinmenden"
   - "@drpatelh"
-  - "@gallvp"
+maintainers:
+  - "@kevinmenden"
+  - "@drpatelh"
diff --git a/modules/nf-core/star/starsolo/main.nf b/modules/nf-core/star/starsolo/main.nf
deleted file mode 100644
index 07499b6..0000000
--- a/modules/nf-core/star/starsolo/main.nf
+++ /dev/null
@@ -1,94 +0,0 @@
-process STARSOLO {
-    tag "$meta.id"
-    label 'process_high'
-
-    conda "bioconda::star=2.7.10b"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/star:2.7.10b--h9ee0642_0':
-        'biocontainers/star:2.7.10b--h9ee0642_0' }"
-
-    input:
-    tuple val(meta), val(solotype), path(reads)
-    tuple val(meta2), path(index)
-
-    output:
-    tuple val(meta),  path('*.Solo.out')         , emit: counts
-    tuple val(meta),  path('*Log.final.out')     , emit: log_final
-    tuple val(meta),  path('*Log.out')           , emit: log_out
-    tuple val(meta),  path('*Log.progress.out')  , emit: log_progress
-    tuple val(meta),  path('*/Gene/Summary.csv') , emit: summary
-    path "versions.yml"                          , emit: versions
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    def (forward, reverse) = reads.collate(2).transpose()
-    def zcat = reads[0].getExtension() == "gz" ? "--readFilesCommand zcat": ""
-
-    // Handle solotype argument logic
-    switch(solotype) {
-        case "CB_UMI_Simple":
-            solotype_args = meta.umi_len ? "--soloUMIlen ${meta.umi_len} " : "";
-            solotype_args = solotype_args + (meta.whitelist ? "--soloCBwhitelist ${meta.whitelist} " : "--soloCBwhitelist None ");
-            solotype_args = solotype_args + (meta.umi_start ? "--soloUMIstart ${meta.umi_start} " : "");
-            solotype_args = solotype_args + (meta.cb_len ? "--soloCBlen ${meta.cb_len} " : "");
-            solotype_args = solotype_args + (meta.cb_start ? "--soloCBstart ${meta.cb_start} " : "");
-            solotype_args = solotype_args + (meta.barcode_len ? "--soloBarcodeReadLength ${meta.barcode_len} " : "");
-            solotype_args = solotype_args + (meta.barcode_mate ? "--soloBarcodeMate ${meta.barcode_mate} " : "");
-            break
-        case "CB_UMI_Complex":
-            solotype_args = meta.cb_position ? "--soloCBposition ${meta.cb_position}" : "";
-            solotype_args = solotype_args + (meta.whitelist ? "--soloCBwhitelist ${meta.whitelist} " : "--soloCBwhitelist None ");
-            solotype_args = solotype_args + (meta.umi_position ? "--soloUMIposition ${meta.umi_position} " : "");
-            solotype_args = solotype_args + (meta.adapter_seq ? "--soloAdapterSequence ${meta.adapter_seq} " : "");
-            solotype_args = solotype_args + (meta.max_mismatch_adapter ? "--soloAdapterMismatchesNmax ${meta.max_mismatch_adapter} " : "");
-            break
-        case "SmartSeq":
-            solotype_args = "--soloUMIdedup Exact ";
-            solotype_args = solotype_args + (meta.strandedness ? "--soloStrand ${meta.strandedness} " : "");
-            solotype_args = solotype_args + "--outSAMattrRGline ID:${prefix} ";
-            break
-        default:
-            log.warn("Unknown output solotype (${solotype})");
-            break
-    }
-
-    """
-    STAR \\
-        --genomeDir $index \\
-        --readFilesIn ${reverse.join( "," )} ${forward.join( "," )} \\
-        --runThreadN $task.cpus \\
-        --outFileNamePrefix $prefix. \\
-        --soloType $solotype \\
-        $zcat \\
-        $solotype_args \\
-        $args
-
-    if [ -d ${prefix}.Solo.out ]; then
-        find ${prefix}.Solo.out \\( -name "*.tsv" -o -name "*.mtx" \\) -exec gzip {} \\;
-    fi
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        star: \$(STAR --version | sed -e "s/STAR_//g")
-    END_VERSIONS
-    """
-
-    stub:
-    def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    """
-    mkdir ${prefix}.Solo.out/
-    touch ${prefix}.Solo.out/Log.final.out
-    touch ${prefix}.Solo.out/Log.out
-    touch ${prefix}.Solo.out/Log.progress.out
-    touch ${prefix}.Solo.out/Summary.csv
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        star: \$(STAR --version | sed -e "s/STAR_//g")
-    END_VERSIONS
-    """
-}
diff --git a/modules/nf-core/star/starsolo/meta.yml b/modules/nf-core/star/starsolo/meta.yml
deleted file mode 100644
index 4fce56c..0000000
--- a/modules/nf-core/star/starsolo/meta.yml
+++ /dev/null
@@ -1,79 +0,0 @@
-name: "starsolo"
-description: Create a counts matrix for single-cell data using STARSolo, handling cell barcodes and UMI information.
-keywords:
-  - align
-  - count
-  - genome
-  - reference
-tools:
-  - "starsolo":
-      description: "Mapping, demultiplexing and quantification for single cell RNA-seq."
-      homepage: "https://github.com/alexdobin/STAR/"
-      documentation: "https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md"
-      doi: "10.1101/2021.05.05.442755"
-      licence: ["MIT"]
-input:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information.
-        Here, you should add all the specific barcode/umi
-        information for each sample.
-        e.g. `[ id:'test_starsolo', umi_len:'12', cb_start:1 ]`
-  - solotype:
-      type: string
-      description: |
-        Type of single-cell library.
-        It can be CB_UMI_Simple for most common ones such as 10xv2 and 10xv3,
-        CB_UMI_Complex for method such as inDrop and SmartSeq for SMART-Seq.
-  - meta2:
-      type: map
-      description: Groovy Map containing the STAR index information.
-  - index:
-      type: directory
-      description: STAR genome index
-      pattern: "star"
-  - reads:
-      type: file
-      description: |
-        List of input FastQ files of size 1 and 2 for single-end and paired-end data,
-        respectively.
-output:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information.
-        Here, you should add all the specific barcode/umi
-        information for each sample.
-        e.g. `[ id:'test_starsolo', umi_len:'12', cb_start:1 ]`
-  - log_final:
-      type: file
-      description: STAR final log file
-      pattern: "*Log.final.out"
-  - log_out:
-      type: file
-      description: STAR lot out file
-      pattern: "*Log.out"
-  - log_progress:
-      type: file
-      description: STAR log progress file
-      pattern: "*Log.progress.out"
-  - summary:
-      type: file
-      description: STARSolo metrics summary CSV file.
-      pattern: "*/Gene/Summary.csv"
-  - versions:
-      type: file
-      description: File containing software versions
-      pattern: "versions.yml"
-
-authors:
-  - "@kevinmenden"
-  - "@ggabernet"
-  - "@grst"
-  - "@fmalmeida"
-  - "@rhreynolds"
-  - "@apeltzer"
-  - "@vivian-chen16"
-  - "@maxulysse"
-  - "@joaodemeirelles"
diff --git a/modules/nf-core/trinity/main.nf b/modules/nf-core/trinity/main.nf
deleted file mode 100644
index 3960a35..0000000
--- a/modules/nf-core/trinity/main.nf
+++ /dev/null
@@ -1,74 +0,0 @@
-process TRINITY {
-    tag "$meta.id"
-    label 'process_high_memory'
-
-    conda "bioconda::trinity=2.13.2"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/trinity:2.13.2--h00214ad_1':
-        'biocontainers/trinity:2.13.2--h00214ad_1' }"
-
-    input:
-    tuple val(meta), path(reads)
-
-    output:
-    tuple val(meta), path("*.fa.gz")       , emit: transcript_fasta
-    path "versions.yml"                    , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
-
-    if (meta.single_end) {
-        reads_args = "--single ${reads}"
-    } else {
-        reads_args = "--left ${reads[0]} --right ${reads[1]}"
-    }
-
-    // --seqType argument, fasta or fastq. Exact pattern match .fasta or .fa suffix with optional .gz (gzip) suffix
-    seqType_args = reads[0] ==~ /(.*fasta(.gz)?$)|(.*fa(.gz)?$)/ ? "fa" : "fq"
-
-    // Define the memory requirements. Trinity needs this as an option.
-    def avail_mem = 7
-    if (!task.memory) {
-        log.info '[Trinity] Available memory not known - defaulting to 7GB. Specify process memory requirements to change this.'
-    } else {
-        avail_mem = (task.memory.giga*0.8).intValue()
-    }
-
-    """
-    # Note that Trinity needs the word 'trinity' in the outdir
-
-    Trinity \\
-    --seqType ${seqType_args} \\
-    --max_memory ${avail_mem}G \\
-    ${reads_args} \\
-    --output ${prefix}_trinity \\
-    --CPU $task.cpus \\
-    $args
-
-    gzip -cf ${prefix}_trinity.Trinity.fasta > ${prefix}.fa.gz
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        trinity: \$(echo \$(Trinity --version | head -n 1 2>&1) | sed 's/^Trinity version: Trinity-v//' ))
-    END_VERSIONS
-
-    # Need to only take the first line of --version since it will warn about not being up-to-date and this messes up the version.yaml.
-    """
-
-    stub:
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    """
-    touch ${prefix}.fa.gz
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        trinity: \$(echo \$(Trinity --version | head -n 1 2>&1) | sed 's/^Trinity version: Trinity-v//' ))
-    END_VERSIONS
-
-    # Need to only take the first line of --version since it will warn about not being up-to-date and this messes up the version.yaml.
-    """
-}
diff --git a/modules/nf-core/trinity/meta.yml b/modules/nf-core/trinity/meta.yml
deleted file mode 100644
index 26e8c10..0000000
--- a/modules/nf-core/trinity/meta.yml
+++ /dev/null
@@ -1,45 +0,0 @@
-name: "trinity"
-description: Assembles a de novo transcriptome from RNAseq reads
-keywords:
-  - assembly
-  - de novo assembler
-  - fasta
-  - fastq
-tools:
-  - "trinity":
-      description: "Trinity assembles transcript sequences from Illumina RNA-Seq data."
-      homepage: "https://github.com/trinityrnaseq/trinityrnaseq/wiki"
-      documentation: "https://github.com/trinityrnaseq/trinityrnaseq/wiki"
-      tool_dev_url: "https://github.com/trinityrnaseq/trinityrnaseq/"
-      doi: "10.1038/nbt.1883"
-      licence: "['BSD-3-clause']"
-
-input:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - reads:
-      type: file
-      description: fasta/fastq file of reads to be assembled into a transcriptome
-      pattern: "*.{fa|fasta|fq|fastq}"
-
-output:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - versions:
-      type: file
-      description: File containing software versions
-      pattern: "versions.yml"
-  - transcript_fasta:
-      type: file
-      description: de novo assembled transcripts fasta file compressed
-      pattern: "*.fa.gz"
-
-authors:
-  - "@timslittle"
-  - "@gallvp"
diff --git a/modules/nf-core/umitools/dedup/main.nf b/modules/nf-core/umitools/dedup/main.nf
deleted file mode 100644
index 56ea046..0000000
--- a/modules/nf-core/umitools/dedup/main.nf
+++ /dev/null
@@ -1,62 +0,0 @@
-process UMITOOLS_DEDUP {
-    tag "$meta.id"
-    label "process_medium"
-
-    conda "bioconda::umi_tools=1.1.4"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/umi_tools:1.1.4--py38hbff2b2d_1' :
-        'biocontainers/umi_tools:1.1.4--py38hbff2b2d_1' }"
-
-    input:
-    tuple val(meta), path(bam), path(bai)
-    val get_output_stats
-
-    output:
-    tuple val(meta), path("${prefix}.bam")     , emit: bam
-    tuple val(meta), path("*.log")             , emit: log
-    tuple val(meta), path("*edit_distance.tsv"), optional:true, emit: tsv_edit_distance
-    tuple val(meta), path("*per_umi.tsv")      , optional:true, emit: tsv_per_umi
-    tuple val(meta), path("*per_position.tsv") , optional:true, emit: tsv_umi_per_position
-    path  "versions.yml"                       , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args = task.ext.args ?: ''
-    prefix = task.ext.prefix ?: "${meta.id}"
-    def paired = meta.single_end ? "" : "--paired"
-    stats = get_output_stats ? "--output-stats ${prefix}" : ""
-    if ("$bam" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!"
-
-    if (!(args ==~ /.*--random-seed.*/)) {args += " --random-seed=100"}
-    """
-    PYTHONHASHSEED=0 umi_tools \\
-        dedup \\
-        -I $bam \\
-        -S ${prefix}.bam \\
-        -L ${prefix}.log \\
-        $stats \\
-        $paired \\
-        $args
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//')
-    END_VERSIONS
-    """
-
-    stub:
-    """
-    touch ${prefix}.bam
-    touch ${prefix}.log
-    touch ${prefix}_edit_distance.tsv
-    touch ${prefix}_per_umi.tsv
-    touch ${prefix}_per_position.tsv
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//')
-    END_VERSIONS
-    """
-}
diff --git a/modules/nf-core/umitools/dedup/meta.yml b/modules/nf-core/umitools/dedup/meta.yml
deleted file mode 100644
index 534d4c6..0000000
--- a/modules/nf-core/umitools/dedup/meta.yml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: umitools_dedup
-description: Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read.
-keywords:
-  - umitools
-  - deduplication
-  - dedup
-tools:
-  - umi_tools:
-      description: >
-        UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs)
-        and single cell RNA-Seq cell barcodes
-      documentation: https://umi-tools.readthedocs.io/en/latest/
-      license: ["MIT"]
-input:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - bam:
-      type: file
-      description: |
-        BAM file containing reads to be deduplicated via UMIs.
-      pattern: "*.{bam}"
-  - bai:
-      type: file
-      description: |
-        BAM index files corresponding to the input BAM file.
-      pattern: "*.{bai}"
-  - get_output_stats:
-      type: boolean
-      description: |
-        Whether or not to generate output stats.
-output:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - bam:
-      type: file
-      description: BAM file with deduplicated UMIs.
-      pattern: "*.{bam}"
-  - log:
-      type: file
-      description: File with logging information
-      pattern: "*.{log}"
-  - tsv_edit_distance:
-      type: file
-      description: Reports the (binned) average edit distance between the UMIs at each position.
-      pattern: "*edit_distance.tsv"
-  - tsv_per_umi:
-      type: file
-      description: UMI-level summary statistics.
-      pattern: "*per_umi.tsv"
-  - tsv_umi_per_position:
-      type: file
-      description: Tabulates the counts for unique combinations of UMI and position.
-      pattern: "*per_position.tsv"
-  - versions:
-      type: file
-      description: File containing software versions
-      pattern: "versions.yml"
-
-authors:
-  - "@drpatelh"
-  - "@grst"
-  - "@klkeys"
diff --git a/modules/nf-core/umitools/extract/environment.yml b/modules/nf-core/umitools/extract/environment.yml
new file mode 100644
index 0000000..7d08ac0
--- /dev/null
+++ b/modules/nf-core/umitools/extract/environment.yml
@@ -0,0 +1,7 @@
+name: umitools_extract
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::umi_tools=1.1.4
diff --git a/modules/nf-core/umitools/extract/main.nf b/modules/nf-core/umitools/extract/main.nf
index 2f94fa9..a01ef73 100644
--- a/modules/nf-core/umitools/extract/main.nf
+++ b/modules/nf-core/umitools/extract/main.nf
@@ -3,7 +3,7 @@ process UMITOOLS_EXTRACT {
     label "process_single"
     label "process_long"
 
-    conda "bioconda::umi_tools=1.1.4"
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/umi_tools:1.1.4--py38hbff2b2d_1' :
         'biocontainers/umi_tools:1.1.4--py38hbff2b2d_1' }"
diff --git a/modules/nf-core/umitools/extract/meta.yml b/modules/nf-core/umitools/extract/meta.yml
index db64a0f..7695b27 100644
--- a/modules/nf-core/umitools/extract/meta.yml
+++ b/modules/nf-core/umitools/extract/meta.yml
@@ -1,15 +1,16 @@
 name: umitools_extract
 description: Extracts UMI barcode from a read and add it to the read name, leaving any sample barcode in place
 keywords:
-  - umitools
+  - UMI
+  - barcode
   - extract
+  - umitools
 tools:
   - umi_tools:
       description: >
-        UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs)
-        and single cell RNA-Seq cell barcodes
+        UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes
       documentation: https://umi-tools.readthedocs.io/en/latest/
-      license: ["MIT"]
+      license: "MIT"
 input:
   - meta:
       type: map
@@ -29,9 +30,7 @@ output:
   - reads:
       type: file
       description: >
-        Extracted FASTQ files. |
-        For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. |
-        For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz.
+        Extracted FASTQ files. | For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz.
       pattern: "*.{fastq.gz}"
   - log:
       type: file
@@ -41,7 +40,9 @@ output:
       type: file
       description: File containing software versions
       pattern: "versions.yml"
-
 authors:
   - "@drpatelh"
   - "@grst"
+maintainers:
+  - "@drpatelh"
+  - "@grst"
diff --git a/modules/nf-core/umitools/extract/tests/main.nf.test b/modules/nf-core/umitools/extract/tests/main.nf.test
new file mode 100644
index 0000000..22242d1
--- /dev/null
+++ b/modules/nf-core/umitools/extract/tests/main.nf.test
@@ -0,0 +1,35 @@
+nextflow_process {
+
+    name "Test Process UMITOOLS_EXTRACT"
+    script "../main.nf"
+    process "UMITOOLS_EXTRACT"
+    config "./nextflow.config"
+    tag "modules_nfcore"
+    tag "modules"
+    tag "umitools"
+    tag "umitools/extract"
+
+    test("Should run without failures") {
+
+        when {
+            params {
+                outdir = "$outputDir"
+            }
+            process {
+                """
+                input[0] = [ [ id:'test', single_end:true ], // meta map
+              [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
+            ]
+                """
+            }
+        }
+
+        then {
+            assertAll (
+            { assert process.success },
+            { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/umitools/extract/tests/main.nf.test.snap b/modules/nf-core/umitools/extract/tests/main.nf.test.snap
new file mode 100644
index 0000000..54e77fb
--- /dev/null
+++ b/modules/nf-core/umitools/extract/tests/main.nf.test.snap
@@ -0,0 +1,10 @@
+{
+    "versions": {
+        "content": [
+            [
+                "versions.yml:md5,866a2da05ce1af35cc07261ffe6bc31a"
+            ]
+        ],
+        "timestamp": "2023-10-17T08:25:55.427194"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/umitools/extract/tests/nextflow.config b/modules/nf-core/umitools/extract/tests/nextflow.config
new file mode 100644
index 0000000..c866f5a
--- /dev/null
+++ b/modules/nf-core/umitools/extract/tests/nextflow.config
@@ -0,0 +1,9 @@
+process {
+
+    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+    
+    withName: UMITOOLS_EXTRACT {
+        ext.args = '--bc-pattern="NNNN"'
+    }
+
+}
diff --git a/modules/nf-core/umitools/extract/tests/tags.yml b/modules/nf-core/umitools/extract/tests/tags.yml
new file mode 100644
index 0000000..c3fb23d
--- /dev/null
+++ b/modules/nf-core/umitools/extract/tests/tags.yml
@@ -0,0 +1,2 @@
+umitools/extract:
+  - modules/nf-core/umitools/extract/**
diff --git a/modules/nf-core/umitools/group/main.nf b/modules/nf-core/umitools/group/main.nf
deleted file mode 100644
index 9a6370b..0000000
--- a/modules/nf-core/umitools/group/main.nf
+++ /dev/null
@@ -1,62 +0,0 @@
-process UMITOOLS_GROUP {
-    tag "$meta.id"
-    label 'process_medium'
-
-    conda "bioconda::umi_tools=1.1.4"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/umi_tools:1.1.4--py38hbff2b2d_1' :
-        'biocontainers/umi_tools:1.1.4--py38hbff2b2d_1' }"
-
-    input:
-    tuple val(meta), path(bam), path(bai)
-    val create_bam
-    val get_group_info
-
-    output:
-    tuple val(meta), path("*.log")        , emit: log
-    tuple val(meta), path("${prefix}.bam"), optional: true, emit: bam
-    tuple val(meta), path("*.tsv")        , optional: true, emit: tsv
-    path "versions.yml"                   , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args    = task.ext.args   ?: ''
-    prefix      = task.ext.prefix ?: "${meta.id}"
-    def paired  = meta.single_end ? "" : "--paired"
-    output_bam  = create_bam      ? "--output-bam -S ${prefix}.bam" : ""
-    group_info  = get_group_info  ? "--group-out ${prefix}.tsv"     : ""
-
-    if (create_bam && "$bam" == "${prefix}.bam") { error "Input and output names are the same, set prefix in module configuration to disambiguate!" }
-
-    if (!(args ==~ /.*--random-seed.*/)) {args += " --random-seed=100"}
-    """
-    PYTHONHASHSEED=0 umi_tools \\
-        group \\
-        -I $bam \\
-        $output_bam \\
-        -L ${prefix}.log \\
-        $group_info \\
-        $paired \\
-        $args
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//')
-    END_VERSIONS
-    """
-
-    stub:
-    prefix = task.ext.prefix ?: "${meta.id}"
-    """
-    touch ${prefix}.bam
-    touch ${prefix}.log
-    touch ${prefix}.tsv
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//')
-    END_VERSIONS
-    """
-}
diff --git a/modules/nf-core/umitools/group/meta.yml b/modules/nf-core/umitools/group/meta.yml
deleted file mode 100644
index 1fa826d..0000000
--- a/modules/nf-core/umitools/group/meta.yml
+++ /dev/null
@@ -1,62 +0,0 @@
-name: umitools_group
-description: Group reads based on their UMI and mapping coordinates
-keywords:
-  - umitools
-  - umi
-  - deduplication
-  - dedup
-  - clustering
-tools:
-  - umi_tools:
-      description: >
-        UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs)
-        and single cell RNA-Seq cell barcodes
-      documentation: https://umi-tools.readthedocs.io/en/latest/
-      license: ["MIT"]
-
-input:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - bam:
-      type: file
-      description: |
-        BAM file containing reads to be deduplicated via UMIs.
-      pattern: "*.{bam}"
-  - bai:
-      type: file
-      description: |
-        BAM index files corresponding to the input BAM file.
-      pattern: "*.{bai}"
-  - create_bam:
-      type: boolean
-      description: |
-        Whether or not to create a read group tagged BAM file.
-  - get_group_info:
-      type: boolean
-      description: |
-        Whether or not to generate the flatfile describing the read groups, see docs for complete info of all columns
-
-output:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - bam:
-      type: file
-      description: a read group tagged BAM file.
-      pattern: "${prefix}.{bam}"
-  - log:
-      type: file
-      description: File with logging information
-      pattern: "*.{log}"
-  - tsv:
-      type: file
-      description: Flatfile describing the read groups, see docs for complete info of all columns
-      pattern: "*.{tsv}"
-
-authors:
-  - "@Joon-Klaps"
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf
index 63a6592..3dbb27e 100644
--- a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf
@@ -96,8 +96,7 @@ workflow FASTQ_FASTQC_UMITOOLS_FASTP {
             .out
             .reads
             .join(trim_json)
-            // Change: Bypassing getFastpReadsAfterFiltering when FASTP stub returns empty json
-            .map { meta, reads, json -> [ meta, reads, json.text ? getFastpReadsAfterFiltering(json) : min_trimmed_reads.toLong()] }
+            .map { meta, reads, json -> [ meta, reads, getFastpReadsAfterFiltering(json) ] }
             .set { ch_num_trimmed_reads }
 
         ch_num_trimmed_reads
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml
index eafb0dc..220e8db 100644
--- a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml
@@ -69,8 +69,10 @@ output:
   - reads:
       type: file
       description: >
-        Extracted FASTQ files. |
-        For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. |
+        Extracted FASTQ files. | For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. |
+
+
+
           For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz.
       pattern: "*.{fastq.gz}"
   - fastqc_html:
@@ -122,4 +124,5 @@ output:
       pattern: "versions.yml"
 authors:
   - "@robsyme"
-  - "@gallvp"
+maintainers:
+  - "@robsyme"

From bea59ef3a81622b1b6f3d764650f0143c81eaa20 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Wed, 22 Nov 2023 13:41:41 +1300
Subject: [PATCH 19/59] Reimported kherronism modules with nf-core/tools

---
 modules.json                             | 20 ++++++++++
 modules/kherronism/CHANGELOG.md          | 21 ----------
 modules/kherronism/LICENSE               | 21 ----------
 modules/kherronism/braker3/main.nf       | 50 ++++++------------------
 modules/kherronism/braker3/meta.yml      | 24 ++++++++++--
 modules/kherronism/repeatmasker/main.nf  | 19 +--------
 modules/kherronism/repeatmasker/meta.yml |  1 -
 7 files changed, 53 insertions(+), 103 deletions(-)
 delete mode 100644 modules/kherronism/CHANGELOG.md
 delete mode 100644 modules/kherronism/LICENSE

diff --git a/modules.json b/modules.json
index 14b25cd..6c14afd 100644
--- a/modules.json
+++ b/modules.json
@@ -2,6 +2,26 @@
     "name": "PlantandFoodResearch/pangene",
     "homePage": "https://github.com/PlantandFoodResearch/pangene",
     "repos": {
+        "git@github.com:kherronism/nf-modules.git": {
+            "modules": {
+                "kherronism": {
+                    "braker3": {
+                        "branch": "dev",
+                        "git_sha": "b01fec253f3b73b24e3f166a96d4beb49e58b0a6",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "repeatmasker": {
+                        "branch": "dev",
+                        "git_sha": "6778d5bb4c9f3d597753c699226fcde8d0811bfb",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    }
+                }
+            }
+        },
         "https://github.com/nf-core/modules.git": {
             "modules": {
                 "nf-core": {
diff --git a/modules/kherronism/CHANGELOG.md b/modules/kherronism/CHANGELOG.md
deleted file mode 100644
index 5665af2..0000000
--- a/modules/kherronism/CHANGELOG.md
+++ /dev/null
@@ -1,21 +0,0 @@
-## Source
-
-- Repo: https://github.com/kherronism/rewarewaannotation/tree/1a39a83e22fe2d8665a8c6dc49772cce6579983f
-- License: See LICENSE file
-
-## Changes
-
-### repeatmasker
-
-1. Added stub
-2. Added author in meta.yml
-3. Changed input "tuple val(meta), path(lib)" to "path(lib)"
-
-### braker3
-
-1. Added stub
-2. Added author in meta.yml
-3. Made output hintsfile optional as it is not produced for ab-initio annotation.
-4. Directed `--AUGUSTUS_CONFIG_PATH` to work folder. This avoids "species already exists" error on subsequent runs with same species.
-5. Updated version extractor.
-6. Added `containerOptions "-B $TMPDIR:$TMPDIR"`
\ No newline at end of file
diff --git a/modules/kherronism/LICENSE b/modules/kherronism/LICENSE
deleted file mode 100644
index 4b42925..0000000
--- a/modules/kherronism/LICENSE
+++ /dev/null
@@ -1,21 +0,0 @@
-MIT License
-
-Copyright (c) Katie Herron
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
\ No newline at end of file
diff --git a/modules/kherronism/braker3/main.nf b/modules/kherronism/braker3/main.nf
index c9d915a..e5cc77c 100644
--- a/modules/kherronism/braker3/main.nf
+++ b/modules/kherronism/braker3/main.nf
@@ -4,22 +4,17 @@ process BRAKER3 {
 
     conda "bioconda::braker3=3.0.3"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'registry.hub.docker.com/teambraker/braker3:v.1.0.3':
-        'registry.hub.docker.com/teambraker/braker3:v.1.0.3' }"
+        'registry.hub.docker.com/teambraker/braker3:v.1.0.4':
+        'registry.hub.docker.com/teambraker/braker3:v.1.0.4' }"
 
     input:
-    tuple val(meta), path(fasta)
-    path bam
-    path rnaseq_sets_dirs
-    path rnaseq_sets_ids
-    path proteins
-    path hintsfile
+    tuple val(meta), path(fasta), path(rnaseq_sets_ids), path(rnaseq_sets_dirs), path(bam), path(proteins), path(hintsfile)
 
     output:
     tuple val(meta), path("${prefix}/braker.gtf")      , emit: gtf
     tuple val(meta), path("${prefix}/braker.codingseq"), emit: cds
     tuple val(meta), path("${prefix}/braker.aa")       , emit: aa
-    tuple val(meta), path("${prefix}/hintsfile.gff")   , emit: hintsfile, optional: true
+    tuple val(meta), path("${prefix}/hintsfile.gff")   , emit: hintsfile
     tuple val(meta), path("${prefix}/braker.log")      , emit: log
     tuple val(meta), path("${prefix}/what-to-cite.txt"), emit: citations
     tuple val(meta), path("${prefix}/braker.gff3")     , emit: gff3     , optional: true
@@ -32,48 +27,27 @@ process BRAKER3 {
     def args = task.ext.args ?: ''
     prefix   = task.ext.prefix ?: "${meta.id}"
 
-    def hints    = hintsfile ? "--hints=${hintsfile}" : ''
+    def rna_ids  = rnaseq_sets_ids ? "--rnaseq_sets_ids=${rnaseq_sets_ids}" : ''
+    def rna_dirs = rnaseq_sets_dirs ? "--rnaseq_sets_dirs=${rnaseq_sets_dirs}" : ''
     def bam      = bam ? "--bam=${bam}" : ''
     def proteins = proteins ? "--prot_seq=${proteins}" : ''
-    def rna_dirs = rnaseq_sets_dirs ? "--rnaseq_sets_dirs=${rnaseq_sets_dirs}" : ''
-    def rna_ids  = rnaseq_sets_ids ? "--rnaseq_sets_ids=${rnaseq_sets_ids}" : ''
+    def hints    = hintsfile ? "--hints=${hintsfile}" : ''
     """
-    cp -r /usr/share/augustus/config augustus_config
-
     braker.pl \\
         --genome ${fasta} \\
         --species ${prefix} \\
         --workingdir ${prefix} \\
-        --AUGUSTUS_CONFIG_PATH "\$(pwd)/augustus_config" \\
         --threads ${task.cpus} \\
-        ${hints} \\
+        ${rna_ids} \\
+        ${rna_dirs} \\
         ${bam} \\
         ${proteins} \\
-        ${rna_dirs} \\
-        ${rna_ids} \\
+        ${hints} \\
         ${args}
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        braker3: \$(braker.pl --version 2>&1 | grep "version" | sed 's/braker.pl version//; s/\\s*//')
-    END_VERSIONS
-    """
-
-    stub:
-    prefix   = task.ext.prefix ?: "${meta.id}"
-    """
-    mkdir "$prefix"
-
-    touch "${prefix}/braker.gtf"
-    touch "${prefix}/braker.codingseq"
-    touch "${prefix}/braker.aa"
-    touch "${prefix}/hintsfile.gff"
-    touch "${prefix}/braker.log"
-    touch "${prefix}/what-to-cite.txt"
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        braker3: \$(braker.pl --version 2>&1 | grep "version" | sed 's/braker.pl version//; s/\\s*//')
+         braker3: \$(braker.pl --version 2>&1 | sed 's/^.*BRAKER3 v//; s/ .*\$//')
     END_VERSIONS
     """
-}
\ No newline at end of file
+}
diff --git a/modules/kherronism/braker3/meta.yml b/modules/kherronism/braker3/meta.yml
index b3506fd..9bc13a3 100644
--- a/modules/kherronism/braker3/meta.yml
+++ b/modules/kherronism/braker3/meta.yml
@@ -22,11 +22,28 @@ input:
       description: |
         Groovy Map containing sample information
         e.g. [ id:'test', single_end:false ]
-  - assembly:
+  - fasta:
       type: file
-      description: Genome assembly
+      description: Genome assembly fasta
       pattern: "*.{fasta,fa,fas,faa,fna}"
-
+  - rnaseq_sets_ids:
+      type: file
+      description: IDs of RNA-seq data sets to be passed to --rnaseq_sets_ids
+  - rnaseq_sets_dirs:
+      type: file
+      description: Directories of RNA-seq data sets to be passed to --rnaseq_sets_dirs
+  - bam:
+      type: file
+      description: BAM file of RNA-seq data to be passed to --bam
+      pattern: "*.bam"
+  - proteins:
+      type: file
+      description: Protein evidence to be passed to --proteins
+      pattern: "*.{fasta,fa,fas,faa}"
+  - hintsfile:
+      type: file
+      description: Hintsfile to be passed to --hintsfile
+      pattern: "*.{gff, gtf, gff3}"
 output:
   - gtf:
       type: file
@@ -55,4 +72,3 @@ output:
 
 authors:
   - "@kherronism"
-  - "@gallvp"
diff --git a/modules/kherronism/repeatmasker/main.nf b/modules/kherronism/repeatmasker/main.nf
index fdab29e..6abf0b9 100644
--- a/modules/kherronism/repeatmasker/main.nf
+++ b/modules/kherronism/repeatmasker/main.nf
@@ -8,8 +8,7 @@ process REPEATMASKER {
         'biocontainers/repeatmasker:4.1.5--pl5321hdfd78af_0' }"
 
     input:
-    tuple val(meta), path(fasta)
-    path(lib)
+    tuple val(meta), path(fasta), path(lib)
 
     output:
     tuple val(meta), path("${meta.id}/*.f*a.masked") , emit: fasta_masked
@@ -40,20 +39,4 @@ process REPEATMASKER {
         repeatmasker: ${VERSION}
     END_VERSIONS
     """
-
-    stub:
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    def VERSION = '4.1.5'  // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
-    """
-    mkdir "$meta.id"
-
-    touch "${meta.id}/${meta.id}.fasta.masked"
-    touch "${meta.id}/${meta.id}.fasta.out"
-    touch "${meta.id}/${meta.id}.fasta.tbl"
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        repeatmasker: ${VERSION}
-    END_VERSIONS
-    """
 }
diff --git a/modules/kherronism/repeatmasker/meta.yml b/modules/kherronism/repeatmasker/meta.yml
index eb15048..8adeb55 100644
--- a/modules/kherronism/repeatmasker/meta.yml
+++ b/modules/kherronism/repeatmasker/meta.yml
@@ -44,4 +44,3 @@ output:
 
 authors:
   - "@kherronism"
-  - "@gallvp"

From 2dda7529b9000a590d9d30f9fbf40d32c85d1a2d Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Wed, 22 Nov 2023 15:04:06 +1300
Subject: [PATCH 20/59] Updated braker3

---
 modules/kherronism/braker3/main.nf | 37 ++++++++++++++++++++++++++----
 1 file changed, 32 insertions(+), 5 deletions(-)

diff --git a/modules/kherronism/braker3/main.nf b/modules/kherronism/braker3/main.nf
index e5cc77c..d44c986 100644
--- a/modules/kherronism/braker3/main.nf
+++ b/modules/kherronism/braker3/main.nf
@@ -4,17 +4,22 @@ process BRAKER3 {
 
     conda "bioconda::braker3=3.0.3"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'registry.hub.docker.com/teambraker/braker3:v.1.0.4':
-        'registry.hub.docker.com/teambraker/braker3:v.1.0.4' }"
+        'registry.hub.docker.com/teambraker/braker3:v.1.0.6':
+        'registry.hub.docker.com/teambraker/braker3:v.1.0.6' }"
 
     input:
-    tuple val(meta), path(fasta), path(rnaseq_sets_ids), path(rnaseq_sets_dirs), path(bam), path(proteins), path(hintsfile)
+    tuple val(meta), path(fasta)
+    path bam
+    path rnaseq_sets_dirs
+    path rnaseq_sets_ids
+    path proteins
+    path hintsfile
 
     output:
     tuple val(meta), path("${prefix}/braker.gtf")      , emit: gtf
     tuple val(meta), path("${prefix}/braker.codingseq"), emit: cds
     tuple val(meta), path("${prefix}/braker.aa")       , emit: aa
-    tuple val(meta), path("${prefix}/hintsfile.gff")   , emit: hintsfile
+    tuple val(meta), path("${prefix}/hintsfile.gff")   , emit: hintsfile, optional: true
     tuple val(meta), path("${prefix}/braker.log")      , emit: log
     tuple val(meta), path("${prefix}/what-to-cite.txt"), emit: citations
     tuple val(meta), path("${prefix}/braker.gff3")     , emit: gff3     , optional: true
@@ -33,10 +38,13 @@ process BRAKER3 {
     def proteins = proteins ? "--prot_seq=${proteins}" : ''
     def hints    = hintsfile ? "--hints=${hintsfile}" : ''
     """
+    cp -r /usr/share/augustus/config augustus_config
+
     braker.pl \\
         --genome ${fasta} \\
         --species ${prefix} \\
         --workingdir ${prefix} \\
+        --AUGUSTUS_CONFIG_PATH "\$(pwd)/augustus_config" \\
         --threads ${task.cpus} \\
         ${rna_ids} \\
         ${rna_dirs} \\
@@ -47,7 +55,26 @@ process BRAKER3 {
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-         braker3: \$(braker.pl --version 2>&1 | sed 's/^.*BRAKER3 v//; s/ .*\$//')
+        braker3: \$(braker.pl --version 2>/dev/null | sed 's/braker.pl version//')
+    END_VERSIONS
+    """
+
+    stub:
+    prefix = task.ext.prefix ?: "${meta.id}"
+    def createHints = (rna_ids || bam || proteins || hints) ? "touch ${prefix}/hintsfile.gff" : ''
+    """
+    mkdir "$prefix"
+
+    touch "${prefix}/braker.gtf"
+    touch "${prefix}/braker.codingseq"
+    touch "${prefix}/braker.aa"
+    $createHints
+    touch "${prefix}/braker.log"
+    touch "${prefix}/what-to-cite.txt"
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        braker3: \$(braker.pl --version 2>/dev/null | sed 's/braker.pl version//')
     END_VERSIONS
     """
 }

From 8efa34dcc99a5932cfb7e0ec2927217c6917c0af Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Wed, 22 Nov 2023 15:27:07 +1300
Subject: [PATCH 21/59] Updated repeatmasker

---
 modules/kherronism/repeatmasker/main.nf | 19 ++++++++++++++++++-
 pangene_pfr.sh                          |  1 +
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/modules/kherronism/repeatmasker/main.nf b/modules/kherronism/repeatmasker/main.nf
index 6abf0b9..fdab29e 100644
--- a/modules/kherronism/repeatmasker/main.nf
+++ b/modules/kherronism/repeatmasker/main.nf
@@ -8,7 +8,8 @@ process REPEATMASKER {
         'biocontainers/repeatmasker:4.1.5--pl5321hdfd78af_0' }"
 
     input:
-    tuple val(meta), path(fasta), path(lib)
+    tuple val(meta), path(fasta)
+    path(lib)
 
     output:
     tuple val(meta), path("${meta.id}/*.f*a.masked") , emit: fasta_masked
@@ -39,4 +40,20 @@ process REPEATMASKER {
         repeatmasker: ${VERSION}
     END_VERSIONS
     """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def VERSION = '4.1.5'  // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
+    """
+    mkdir "$meta.id"
+
+    touch "${meta.id}/${meta.id}.fasta.masked"
+    touch "${meta.id}/${meta.id}.fasta.out"
+    touch "${meta.id}/${meta.id}.fasta.tbl"
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        repeatmasker: ${VERSION}
+    END_VERSIONS
+    """
 }
diff --git a/pangene_pfr.sh b/pangene_pfr.sh
index 785199e..3b048df 100644
--- a/pangene_pfr.sh
+++ b/pangene_pfr.sh
@@ -14,5 +14,6 @@ ml apptainer/1.1
 ml nextflow/23.04.4
 
 export APPTAINER_BINDPATH="$APPTAINER_BINDPATH,/workspace/$USER/tmp:/tmp"
+export TMPDIR="/workspace/$USER/tmp"
 
 nextflow main.nf -profile slurm -resume
\ No newline at end of file

From 8d31976d3315c9023617d02160693ecebf3bbed8 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Thu, 23 Nov 2023 11:08:28 +1300
Subject: [PATCH 22/59] Updated modules

---
 modules.json                                  |  2 +-
 modules/nf-core/sortmerna/tests/main.nf.test  | 59 +++++++++++++++++++
 .../nf-core/sortmerna/tests/main.nf.test.snap | 49 +++++++++++++++
 modules/nf-core/sortmerna/tests/tags.yml      |  2 +
 4 files changed, 111 insertions(+), 1 deletion(-)
 create mode 100644 modules/nf-core/sortmerna/tests/main.nf.test
 create mode 100644 modules/nf-core/sortmerna/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/sortmerna/tests/tags.yml

diff --git a/modules.json b/modules.json
index 6c14afd..0510115 100644
--- a/modules.json
+++ b/modules.json
@@ -84,7 +84,7 @@
                     },
                     "sortmerna": {
                         "branch": "master",
-                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "git_sha": "a20b6b1e9114a08007608528e4a2b0fbbb8a9ca2",
                         "installed_by": [
                             "modules"
                         ]
diff --git a/modules/nf-core/sortmerna/tests/main.nf.test b/modules/nf-core/sortmerna/tests/main.nf.test
new file mode 100644
index 0000000..3ec2692
--- /dev/null
+++ b/modules/nf-core/sortmerna/tests/main.nf.test
@@ -0,0 +1,59 @@
+nextflow_process {
+
+    name "Test Process SORTMERNA"
+    script "../main.nf"
+    process "SORTMERNA"
+    tag "modules"
+    tag "modules_nfcore"
+    tag "sortmerna"
+
+    test("sarscov2 single_end") {
+
+        when {
+            process {
+                """
+                input[0] = [ [ id:'test', single_end:true ], // meta map
+                             [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
+                           ]
+                input[1] = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.reads).match("se_reads") },
+                { assert process.out.log },
+                { assert snapshot(process.out.versions).match("se_versions") }
+            )
+        }
+
+    }
+
+    test("sarscov2 paired_end") {
+
+        when {
+            process {
+                """
+                input[0] = [ [ id:'test', single_end:false ], // meta map
+                             [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+                               file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
+                           ]
+                input[1] = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.reads).match("pe_reads") },
+                { assert process.out.log },
+                { assert snapshot(process.out.versions).match("pe_versions") }
+            )
+        }
+
+    }
+
+}
diff --git a/modules/nf-core/sortmerna/tests/main.nf.test.snap b/modules/nf-core/sortmerna/tests/main.nf.test.snap
new file mode 100644
index 0000000..f1bedb7
--- /dev/null
+++ b/modules/nf-core/sortmerna/tests/main.nf.test.snap
@@ -0,0 +1,49 @@
+{
+    "se_versions": {
+        "content": [
+            [
+                "versions.yml:md5,96553a18cad5237fbf76d5a6c966360e"
+            ]
+        ],
+        "timestamp": "2023-11-22T14:25:07.95908694"
+    },
+    "pe_reads": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    [
+                        "test_1.non_rRNA.fastq.gz:md5,e62ff0123a74adfc6903d59a449cbdb0",
+                        "test_2.non_rRNA.fastq.gz:md5,e62ff0123a74adfc6903d59a449cbdb0"
+                    ]
+                ]
+            ]
+        ],
+        "timestamp": "2023-11-22T14:25:19.098771475"
+    },
+    "se_reads": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": true
+                    },
+                    "test.non_rRNA.fastq.gz:md5,e62ff0123a74adfc6903d59a449cbdb0"
+                ]
+            ]
+        ],
+        "timestamp": "2023-11-22T14:25:07.949212892"
+    },
+    "pe_versions": {
+        "content": [
+            [
+                "versions.yml:md5,96553a18cad5237fbf76d5a6c966360e"
+            ]
+        ],
+        "timestamp": "2023-11-22T14:25:19.105098985"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/sortmerna/tests/tags.yml b/modules/nf-core/sortmerna/tests/tags.yml
new file mode 100644
index 0000000..e088480
--- /dev/null
+++ b/modules/nf-core/sortmerna/tests/tags.yml
@@ -0,0 +1,2 @@
+sortmerna:
+  - modules/nf-core/sortmerna/**

From 5eaa87b2ca3c0b824b48d5e694a57bb4aca568ca Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Thu, 23 Nov 2023 12:21:45 +1300
Subject: [PATCH 23/59] Imported fastavalidate and liftoff from pfr/nxf-modules

---
 modules.json                                  | 20 +++++++
 modules/local/fasta_validate/main.nf          | 43 --------------
 .../main.nf => validate_params.nf}            |  0
 modules/pfr/fastavalidate/main.nf             | 56 ++++++++++++++++++
 modules/pfr/fastavalidate/meta.yml            | 49 +++++++++++++++
 modules/{local => pfr}/liftoff/main.nf        | 33 ++++++-----
 modules/pfr/liftoff/meta.yml                  | 59 +++++++++++++++++++
 7 files changed, 201 insertions(+), 59 deletions(-)
 delete mode 100644 modules/local/fasta_validate/main.nf
 rename modules/local/{validate_params/main.nf => validate_params.nf} (100%)
 create mode 100644 modules/pfr/fastavalidate/main.nf
 create mode 100644 modules/pfr/fastavalidate/meta.yml
 rename modules/{local => pfr}/liftoff/main.nf (56%)
 create mode 100644 modules/pfr/liftoff/meta.yml

diff --git a/modules.json b/modules.json
index 0510115..69df7af 100644
--- a/modules.json
+++ b/modules.json
@@ -2,6 +2,26 @@
     "name": "PlantandFoodResearch/pangene",
     "homePage": "https://github.com/PlantandFoodResearch/pangene",
     "repos": {
+        "git@github.com:PlantandFoodResearch/nxf-modules.git": {
+            "modules": {
+                "pfr": {
+                    "fastavalidate": {
+                        "branch": "main",
+                        "git_sha": "5189302ed5fcbb927689a89812c0f792622c35d2",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "liftoff": {
+                        "branch": "main",
+                        "git_sha": "14fd0a73898339bede7ae7bc14077a47c847c5b5",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    }
+                }
+            }
+        },
         "git@github.com:kherronism/nf-modules.git": {
             "modules": {
                 "kherronism": {
diff --git a/modules/local/fasta_validate/main.nf b/modules/local/fasta_validate/main.nf
deleted file mode 100644
index 7c37c39..0000000
--- a/modules/local/fasta_validate/main.nf
+++ /dev/null
@@ -1,43 +0,0 @@
-process FASTA_VALIDATE {
-    tag "$meta.id"
-    label "process_single"
-
-    container "docker://gallvp/fasta_validator:a6a2ec1_ps"
-
-    input:
-    tuple val(meta), path(fasta_file)
-    
-    output:
-    tuple val(meta), path("$validFasta")    , emit: valid_fasta
-    path "versions.yml"                     , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    validFasta = (fasta_file.toString() - ~/\.\w+$/) + ".validated.fasta"
-    """
-    fasta_validate -v $fasta_file >/dev/null
-
-    # If invalid, the above command will fail and
-    # the NXF error startegy will kick in.
-    
-    cat $fasta_file > $validFasta
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        fasta_validate: \$(md5sum \$(which fasta_validate) | cut -d' ' -f1)
-    END_VERSIONS
-    """
-    
-    stub:
-    validFasta = (fasta_file.toString() - ~/\.\w+$/) + ".validated.fasta"
-    """
-    touch $validFasta
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        fasta_validate: \$(md5sum \$(which fasta_validate) | cut -d' ' -f1)
-    END_VERSIONS
-    """
-}
\ No newline at end of file
diff --git a/modules/local/validate_params/main.nf b/modules/local/validate_params.nf
similarity index 100%
rename from modules/local/validate_params/main.nf
rename to modules/local/validate_params.nf
diff --git a/modules/pfr/fastavalidate/main.nf b/modules/pfr/fastavalidate/main.nf
new file mode 100644
index 0000000..873983b
--- /dev/null
+++ b/modules/pfr/fastavalidate/main.nf
@@ -0,0 +1,56 @@
+process FASTAVALIDATE {
+    tag "$meta.id"
+    label 'process_single'
+
+    // conda "YOUR-TOOL-HERE"
+    // container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+    //     'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE':
+    //     'biocontainers/YOUR-TOOL-HERE' }"
+    container 'docker://gallvp/fasta_validator:a6a2ec1_ps'
+
+    input:
+    tuple val(meta), path(fasta)
+    
+    output:
+    tuple val(meta), path('*.validated.fasta')  , emit: valid_fasta , optional: true
+    tuple val(meta), path('*.error.log')        , emit: error_log   , optional: true
+    path "versions.yml"                         , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    fasta_validate \\
+        -v $fasta \\
+        2> "${prefix}.error.log" \\
+        || echo "Errors from fasta_validate printed to ${prefix}.error.log"
+    
+    if [ \$(cat "${prefix}.error.log" | wc -l) -gt 0 ]; then
+        echo "Validation failed..."
+        cat "${prefix}.error.log"
+    else
+        rm "${prefix}.error.log"
+        
+        cat $fasta \\
+            > "${prefix}.validated.fasta"
+    fi
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        fasta_validate: \$(md5sum \$(which fasta_validate) | cut -d' ' -f1)
+    END_VERSIONS
+    """
+    
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch "${prefix}.validated.fasta"
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        fasta_validate: \$(md5sum \$(which fasta_validate) | cut -d' ' -f1)
+    END_VERSIONS
+    """
+}
diff --git a/modules/pfr/fastavalidate/meta.yml b/modules/pfr/fastavalidate/meta.yml
new file mode 100644
index 0000000..ff63b88
--- /dev/null
+++ b/modules/pfr/fastavalidate/meta.yml
@@ -0,0 +1,49 @@
+name: "fastavalidate"
+description: |
+  "A simple validator for fasta files. The module emits the validated file or an
+  error log upon validation failure."
+keywords:
+  - fasta
+  - validation
+  - genome
+tools:
+  - fasta_validate:
+      description: |
+        "A simple C code to validate a fasta file. It only checks a few things,
+        and by default only sets its response via the return code,
+        so you will need to check that!"
+      homepage: "https://github.com/gallvp/fasta_validator"
+      documentation: "https://github.com/gallvp/fasta_validator"
+      tool_dev_url: "https://github.com/gallvp/fasta_validator"
+      doi: "10.5281/zenodo.2532044"
+      licence: ["MIT"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing file information
+        e.g. [ id:'test' ]
+  - fasta:
+      type: file
+      description: Input fasta file
+      pattern: "*.fasta"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing file information
+        e.g. [ id:'test' ]
+  - valid_fasta:
+      type: file
+      description: Validated fasta file if the validation succeeds
+      pattern: "*.validated.fasta"
+  - error_log:
+      type: file
+      description: Error log if the validation fails
+      pattern: "*.error.log"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@gallvp"
diff --git a/modules/local/liftoff/main.nf b/modules/pfr/liftoff/main.nf
similarity index 56%
rename from modules/local/liftoff/main.nf
rename to modules/pfr/liftoff/main.nf
index e10374d..5356728 100644
--- a/modules/local/liftoff/main.nf
+++ b/modules/pfr/liftoff/main.nf
@@ -1,13 +1,16 @@
 process LIFTOFF {
     tag "$meta.id"
-    label "process_high"
+    label 'process_high'
 
-    container 'https://depot.galaxyproject.org/singularity/liftoff:1.6.3--pyhdfd78af_0'
+    conda "bioconda::liftoff=1.6.3"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/liftoff:1.6.3--pyhdfd78af_0':
+        'biocontainers/liftoff:1.6.3--pyhdfd78af_0' }"
 
     input:
     tuple val(meta), path(target_fa)
-    path ref_fa
-    path ref_gff
+    path ref_fa, name: 'liftoff_reference_assembly.fa' // To avoid name collisions betwen target_fa and ref_fa
+    path ref_annotation
     
     output:
     tuple val(meta), path("*.gff3")             , emit: gff3
@@ -23,18 +26,16 @@ process LIFTOFF {
     def prefix = task.ext.prefix ?: "${meta.id}"
     """
     liftoff \\
-    -g $ref_gff \\
-    -p $task.cpus \\
-    -o "${prefix}.gff3" \\
-    -u "${prefix}.unmapped.txt" \\
-    $args \\
-    $target_fa \\
-    $ref_fa \\
-    2> liftoff.stderr
+        -g $ref_annotation \\
+        -p $task.cpus \\
+        -o "${prefix}.gff3" \\
+        -u "${prefix}.unmapped.txt" \\
+        $args \\
+        $target_fa \\
+        liftoff_reference_assembly.fa
 
-    [ -f "${prefix}.gff3_polished" ] \\
-    && mv "${prefix}.gff3_polished" "${prefix}.polished.gff3" \\
-    || echo "-polish is absent"
+    mv "${prefix}.gff3_polished" "${prefix}.polished.gff3" \\
+        || echo "-polish is absent"
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
@@ -53,4 +54,4 @@ process LIFTOFF {
         liftoff: \$(liftoff --version 2> /dev/null)
     END_VERSIONS
     """
-}
\ No newline at end of file
+}
diff --git a/modules/pfr/liftoff/meta.yml b/modules/pfr/liftoff/meta.yml
new file mode 100644
index 0000000..e859282
--- /dev/null
+++ b/modules/pfr/liftoff/meta.yml
@@ -0,0 +1,59 @@
+name: "liftoff"
+description: "Liftoff is a tool that accurately maps annotations in GFF or GTF between assemblies of the same, or closely-related species."
+keywords:
+  - genome
+  - annotation
+  - gff3
+  - gtf
+  - liftover
+tools:
+  - "liftoff":
+      description: "Liftoff is a tool that accurately maps annotations in GFF or GTF between assemblies of the same, or closely-related species."
+      homepage: "https://github.com/agshumate/Liftoff"
+      documentation: "https://github.com/agshumate/Liftoff"
+      tool_dev_url: "https://github.com/agshumate/Liftoff"
+      doi: "10.1093/bioinformatics/bty191"
+      licence: ["GPL v3 License"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. `[ id:'test' ]`
+  - target_fa:
+      type: file
+      description: Target assembly in fasta format
+      pattern: "*.{fsa,fa,fasta}"
+  - ref_fa:
+      type: file
+      description: Reference assembly in fasta format
+      pattern: "*.{fsa,fa,fasta}"
+  - ref_annotation:
+      type: file
+      description: Reference assembly annotations in gtf or gff3 format
+      pattern: "*.{gtf,gff3}"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. `[ id:'test' ]`
+  - gff3:
+      type: file
+      description: Lifted annotations for the target assembly in gff3 format
+      pattern: "*.gff3"
+  - polished_gff3:
+      type: file
+      description: Polished lifted annotations for the target assembly in gff3 format
+      pattern: "*.polished.gff3"
+      optional: true
+  - unmapped_txt:
+      type: file
+      description: List of unmapped reference annotations
+      pattern: "*.unmapped.txt"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@gallvp"

From 0a96c7f1de7091d674b0397ad6ddec1739feab49 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Wed, 13 Dec 2023 12:21:15 +1300
Subject: [PATCH 24/59] Updated modules and subworkflows

---
 modules.json                                  | 255 +++---
 modules/nf-core/cat/cat/main.nf               |  12 +-
 modules/nf-core/cat/cat/tests/main.nf.test    |  26 +
 .../nf-core/fastavalidator/environment.yml    |   9 +
 modules/nf-core/fastavalidator/main.nf        |  62 ++
 .../fastavalidator}/meta.yml                  |  26 +-
 .../nf-core/fastavalidator/tests/main.nf.test |  60 ++
 .../fastavalidator/tests/main.nf.test.snap    |  76 ++
 modules/nf-core/fastavalidator/tests/tags.yml |   2 +
 modules/nf-core/fastp/main.nf                 |   6 +-
 modules/nf-core/fastqc/main.nf                |   4 +-
 modules/nf-core/fastqc/tests/main.nf.test     |  68 ++
 modules/nf-core/gffread/main.nf               |  10 +-
 modules/nf-core/gffread/meta.yml              |   6 +-
 modules/nf-core/gffread/tests/main.nf.test    |  33 +-
 .../nf-core/gffread/tests/main.nf.test.snap   |  39 +-
 modules/nf-core/gffread/tests/nextflow.config |   5 +
 modules/nf-core/samtools/cat/environment.yml  |   2 +-
 modules/nf-core/samtools/cat/main.nf          |   4 +-
 .../nf-core/samtools/cat/tests/main.nf.test   |  72 ++
 .../samtools/cat/tests/main.nf.test.snap      |  26 +
 modules/nf-core/samtools/cat/tests/tags.yml   |   2 +
 modules/nf-core/star/align/environment.yml    |   2 +-
 modules/nf-core/star/align/main.nf            |   4 +-
 modules/nf-core/star/align/tests/main.nf.test | 339 ++++++++
 .../star/align/tests/main.nf.test.snap        | 769 ++++++++++++++++++
 .../star/align/tests/nextflow.arriba.config   |  14 +
 .../nf-core/star/align/tests/nextflow.config  |  14 +
 .../align/tests/nextflow.starfusion.config    |  14 +
 modules/nf-core/star/align/tests/tags.yml     |   2 +
 .../star/genomegenerate/environment.yml       |   2 +-
 modules/nf-core/star/genomegenerate/main.nf   |   4 +-
 .../star/genomegenerate/tests/main.nf.test    |  38 +
 .../genomegenerate/tests/main.nf.test.snap    |  16 +
 .../star/genomegenerate/tests/tags.yml        |   2 +
 modules/nf-core/umitools/extract/main.nf      |   4 +-
 .../umitools/extract/tests/main.nf.test.snap  |   4 +-
 modules/pfr/fastavalidate/main.nf             |  56 --
 modules/pfr/liftoff/environment.yml           |   9 +
 modules/pfr/liftoff/main.nf                   |  14 +-
 modules/pfr/liftoff/meta.yml                  |   7 +-
 modules/pfr/liftoff/tests/main.nf.test        |  89 ++
 modules/pfr/liftoff/tests/main.nf.test.snap   |  23 +
 modules/pfr/liftoff/tests/nextflow.config     |   5 +
 modules/pfr/liftoff/tests/tags.yml            |   2 +
 .../tests/main.nf.test                        |  60 ++
 .../tests/main.nf.test.snap                   |  81 ++
 .../tests/tags.yml                            |   2 +
 48 files changed, 2129 insertions(+), 252 deletions(-)
 create mode 100644 modules/nf-core/fastavalidator/environment.yml
 create mode 100644 modules/nf-core/fastavalidator/main.nf
 rename modules/{pfr/fastavalidate => nf-core/fastavalidator}/meta.yml (52%)
 create mode 100644 modules/nf-core/fastavalidator/tests/main.nf.test
 create mode 100644 modules/nf-core/fastavalidator/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/fastavalidator/tests/tags.yml
 create mode 100644 modules/nf-core/gffread/tests/nextflow.config
 create mode 100644 modules/nf-core/samtools/cat/tests/main.nf.test
 create mode 100644 modules/nf-core/samtools/cat/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/samtools/cat/tests/tags.yml
 create mode 100644 modules/nf-core/star/align/tests/main.nf.test
 create mode 100644 modules/nf-core/star/align/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/star/align/tests/nextflow.arriba.config
 create mode 100644 modules/nf-core/star/align/tests/nextflow.config
 create mode 100644 modules/nf-core/star/align/tests/nextflow.starfusion.config
 create mode 100644 modules/nf-core/star/align/tests/tags.yml
 create mode 100644 modules/nf-core/star/genomegenerate/tests/main.nf.test
 create mode 100644 modules/nf-core/star/genomegenerate/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/star/genomegenerate/tests/tags.yml
 delete mode 100644 modules/pfr/fastavalidate/main.nf
 create mode 100644 modules/pfr/liftoff/environment.yml
 create mode 100644 modules/pfr/liftoff/tests/main.nf.test
 create mode 100644 modules/pfr/liftoff/tests/main.nf.test.snap
 create mode 100644 modules/pfr/liftoff/tests/nextflow.config
 create mode 100644 modules/pfr/liftoff/tests/tags.yml
 create mode 100644 subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test
 create mode 100644 subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap
 create mode 100644 subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/tags.yml

diff --git a/modules.json b/modules.json
index 69df7af..cde5635 100644
--- a/modules.json
+++ b/modules.json
@@ -1,148 +1,113 @@
 {
-    "name": "PlantandFoodResearch/pangene",
-    "homePage": "https://github.com/PlantandFoodResearch/pangene",
-    "repos": {
-        "git@github.com:PlantandFoodResearch/nxf-modules.git": {
-            "modules": {
-                "pfr": {
-                    "fastavalidate": {
-                        "branch": "main",
-                        "git_sha": "5189302ed5fcbb927689a89812c0f792622c35d2",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    },
-                    "liftoff": {
-                        "branch": "main",
-                        "git_sha": "14fd0a73898339bede7ae7bc14077a47c847c5b5",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    }
-                }
-            }
-        },
-        "git@github.com:kherronism/nf-modules.git": {
-            "modules": {
-                "kherronism": {
-                    "braker3": {
-                        "branch": "dev",
-                        "git_sha": "b01fec253f3b73b24e3f166a96d4beb49e58b0a6",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    },
-                    "repeatmasker": {
-                        "branch": "dev",
-                        "git_sha": "6778d5bb4c9f3d597753c699226fcde8d0811bfb",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    }
-                }
-            }
-        },
-        "https://github.com/nf-core/modules.git": {
-            "modules": {
-                "nf-core": {
-                    "cat/cat": {
-                        "branch": "master",
-                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    },
-                    "cat/fastq": {
-                        "branch": "master",
-                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    },
-                    "custom/dumpsoftwareversions": {
-                        "branch": "master",
-                        "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    },
-                    "fastp": {
-                        "branch": "master",
-                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-                        "installed_by": [
-                            "fastq_fastqc_umitools_fastp"
-                        ]
-                    },
-                    "fastqc": {
-                        "branch": "master",
-                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-                        "installed_by": [
-                            "modules",
-                            "fastq_fastqc_umitools_fastp"
-                        ]
-                    },
-                    "gffread": {
-                        "branch": "master",
-                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    },
-                    "gunzip": {
-                        "branch": "master",
-                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    },
-                    "samtools/cat": {
-                        "branch": "master",
-                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    },
-                    "sortmerna": {
-                        "branch": "master",
-                        "git_sha": "a20b6b1e9114a08007608528e4a2b0fbbb8a9ca2",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    },
-                    "star/align": {
-                        "branch": "master",
-                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    },
-                    "star/genomegenerate": {
-                        "branch": "master",
-                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    },
-                    "umitools/extract": {
-                        "branch": "master",
-                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-                        "installed_by": [
-                            "fastq_fastqc_umitools_fastp"
-                        ]
-                    }
-                }
-            },
-            "subworkflows": {
-                "nf-core": {
-                    "fastq_fastqc_umitools_fastp": {
-                        "branch": "master",
-                        "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f",
-                        "installed_by": [
-                            "subworkflows"
-                        ]
-                    }
-                }
-            }
+  "name": "PlantandFoodResearch/pangene",
+  "homePage": "https://github.com/PlantandFoodResearch/pangene",
+  "repos": {
+    "git@github.com:PlantandFoodResearch/nxf-modules.git": {
+      "modules": {
+        "pfr": {
+          "liftoff": {
+            "branch": "main",
+            "git_sha": "3593ec100c92b656204bf739a51d62fd44d81f6f",
+            "installed_by": ["modules"]
+          }
         }
+      }
+    },
+    "git@github.com:kherronism/nf-modules.git": {
+      "modules": {
+        "kherronism": {
+          "braker3": {
+            "branch": "dev",
+            "git_sha": "b01fec253f3b73b24e3f166a96d4beb49e58b0a6",
+            "installed_by": ["modules"]
+          },
+          "repeatmasker": {
+            "branch": "dev",
+            "git_sha": "6778d5bb4c9f3d597753c699226fcde8d0811bfb",
+            "installed_by": ["modules"]
+          }
+        }
+      }
+    },
+    "https://github.com/nf-core/modules.git": {
+      "modules": {
+        "nf-core": {
+          "cat/cat": {
+            "branch": "master",
+            "git_sha": "d593e8f6b7d1bbbb2acf43a4b9efeeac8d6720f2",
+            "installed_by": ["modules"]
+          },
+          "cat/fastq": {
+            "branch": "master",
+            "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+            "installed_by": ["modules"]
+          },
+          "custom/dumpsoftwareversions": {
+            "branch": "master",
+            "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e",
+            "installed_by": ["modules"]
+          },
+          "fastavalidator": {
+            "branch": "master",
+            "git_sha": "89ff95427f695086369d7927a3c17cea2a37a382",
+            "installed_by": ["modules"]
+          },
+          "fastp": {
+            "branch": "master",
+            "git_sha": "3c77ca9aac783e76c3614a06db3bfe4fef619bde",
+            "installed_by": ["fastq_fastqc_umitools_fastp"]
+          },
+          "fastqc": {
+            "branch": "master",
+            "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53",
+            "installed_by": ["fastq_fastqc_umitools_fastp", "modules"]
+          },
+          "gffread": {
+            "branch": "master",
+            "git_sha": "a2d6c3082c5c44b4155a3246daff36701ee49af8",
+            "installed_by": ["modules"]
+          },
+          "gunzip": {
+            "branch": "master",
+            "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+            "installed_by": ["modules"]
+          },
+          "samtools/cat": {
+            "branch": "master",
+            "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
+            "installed_by": ["modules"]
+          },
+          "sortmerna": {
+            "branch": "master",
+            "git_sha": "a20b6b1e9114a08007608528e4a2b0fbbb8a9ca2",
+            "installed_by": ["modules"]
+          },
+          "star/align": {
+            "branch": "master",
+            "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
+            "installed_by": ["modules"]
+          },
+          "star/genomegenerate": {
+            "branch": "master",
+            "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
+            "installed_by": ["modules"]
+          },
+          "umitools/extract": {
+            "branch": "master",
+            "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53",
+            "installed_by": ["fastq_fastqc_umitools_fastp"]
+          }
+        }
+      },
+      "subworkflows": {
+        "nf-core": {
+          "fastq_fastqc_umitools_fastp": {
+            "branch": "master",
+            "git_sha": "3e8b0c1144ccf60b7848efbdc2be285ff20b49ee",
+            "installed_by": ["subworkflows"]
+          }
+        }
+      }
     }
-}
\ No newline at end of file
+  }
+}
diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf
index 4264a92..970ab76 100644
--- a/modules/nf-core/cat/cat/main.nf
+++ b/modules/nf-core/cat/cat/main.nf
@@ -35,6 +35,10 @@ process CAT_CAT {
     in_zip   = file_list[0].endsWith('.gz')
     command1 = (in_zip && !out_zip) ? 'zcat' : 'cat'
     command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : ''
+    if(file_list.contains(prefix.trim())) {
+        error "The name of the input file can't be the same as for the output prefix in the " +
+        "module CAT_CAT (currently `$prefix`). Please choose a different one."
+    }
     """
     $command1 \\
         $args \\
@@ -49,8 +53,12 @@ process CAT_CAT {
     """
 
     stub:
-    def file_list = files_in.collect { it.toString() }
-    prefix   = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}"
+    def file_list   = files_in.collect { it.toString() }
+    prefix          = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}"
+    if(file_list.contains(prefix.trim())) {
+        error "The name of the input file can't be the same as for the output prefix in the " +
+        "module CAT_CAT (currently `$prefix`). Please choose a different one."
+    }
     """
     touch $prefix
 
diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test
index 5766daa..ed5a4f1 100644
--- a/modules/nf-core/cat/cat/tests/main.nf.test
+++ b/modules/nf-core/cat/cat/tests/main.nf.test
@@ -8,6 +8,32 @@ nextflow_process {
     tag "cat"
     tag "cat/cat"
 
+    test("test_cat_name_conflict") {
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'genome', single_end:true ],
+                        [
+                            file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
+                            file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+        then {
+            assertAll(
+                { assert !process.success },
+                { assert process.stdout.toString().contains("The name of the input file can't be the same as for the output prefix") }
+            )
+        }
+    }
+
     test("test_cat_unzipped_unzipped") {
         when {
             params {
diff --git a/modules/nf-core/fastavalidator/environment.yml b/modules/nf-core/fastavalidator/environment.yml
new file mode 100644
index 0000000..70f346e
--- /dev/null
+++ b/modules/nf-core/fastavalidator/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "fastavalidator"
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - "bioconda::py_fasta_validator=0.6"
diff --git a/modules/nf-core/fastavalidator/main.nf b/modules/nf-core/fastavalidator/main.nf
new file mode 100644
index 0000000..ac5470f
--- /dev/null
+++ b/modules/nf-core/fastavalidator/main.nf
@@ -0,0 +1,62 @@
+process FASTAVALIDATOR {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/py_fasta_validator:0.6--py37h595c7a6_0':
+        'biocontainers/py_fasta_validator:0.6--py37h595c7a6_0' }"
+
+    input:
+    tuple val(meta), path(fasta)
+
+    output:
+    tuple val(meta), path('*.success.log')  , emit: success_log , optional: true
+    tuple val(meta), path('*.error.log')    , emit: error_log   , optional: true
+    path "versions.yml"                     , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    py_fasta_validator \\
+        -f $fasta \\
+        2> "${prefix}.error.log" \\
+        || echo "Errors from fasta_validate printed to ${prefix}.error.log"
+
+    if [ \$(cat "${prefix}.error.log" | wc -l) -gt 0 ]; then
+        echo "Validation failed..."
+
+        cat \\
+            "${prefix}.error.log"
+    else
+        echo "Validation successful..."
+
+        mv \\
+            "${prefix}.error.log" \\
+            fasta_validate.stderr
+
+        echo "Validation successful..." \\
+            > "${prefix}.success.log"
+    fi
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        py_fasta_validator: \$(py_fasta_validator -v | sed 's/.* version //')
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    echo "Validation successful..." \\
+        > "${prefix}.success.log"
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        py_fasta_validator: \$(py_fasta_validator -v | sed 's/.* version //')
+    END_VERSIONS
+    """
+}
diff --git a/modules/pfr/fastavalidate/meta.yml b/modules/nf-core/fastavalidator/meta.yml
similarity index 52%
rename from modules/pfr/fastavalidate/meta.yml
rename to modules/nf-core/fastavalidator/meta.yml
index ff63b88..c5c4371 100644
--- a/modules/pfr/fastavalidate/meta.yml
+++ b/modules/nf-core/fastavalidator/meta.yml
@@ -1,6 +1,8 @@
-name: "fastavalidate"
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "fastavalidator"
 description: |
-  "A simple validator for fasta files. The module emits the validated file or an
+  "Python C-extension for a simple validator for fasta files. The module emits the validated file or an
   error log upon validation failure."
 keywords:
   - fasta
@@ -9,13 +11,13 @@ keywords:
 tools:
   - fasta_validate:
       description: |
-        "A simple C code to validate a fasta file. It only checks a few things,
+        "Python C-extension for a simple C code to validate a fasta file. It only checks a few things,
         and by default only sets its response via the return code,
         so you will need to check that!"
-      homepage: "https://github.com/gallvp/fasta_validator"
-      documentation: "https://github.com/gallvp/fasta_validator"
-      tool_dev_url: "https://github.com/gallvp/fasta_validator"
-      doi: "10.5281/zenodo.2532044"
+      homepage: "https://github.com/linsalrob/py_fasta_validator"
+      documentation: "https://github.com/linsalrob/py_fasta_validator"
+      tool_dev_url: "https://github.com/linsalrob/py_fasta_validator"
+      doi: "10.5281/zenodo.5002710"
       licence: ["MIT"]
 input:
   - meta:
@@ -33,13 +35,13 @@ output:
       description: |
         Groovy Map containing file information
         e.g. [ id:'test' ]
-  - valid_fasta:
+  - success_log:
       type: file
-      description: Validated fasta file if the validation succeeds
-      pattern: "*.validated.fasta"
+      description: Log file for successful validation
+      pattern: "*.success.log"
   - error_log:
       type: file
-      description: Error log if the validation fails
+      description: Log file for failed validation
       pattern: "*.error.log"
   - versions:
       type: file
@@ -47,3 +49,5 @@ output:
       pattern: "versions.yml"
 authors:
   - "@gallvp"
+maintainers:
+  - "@gallvp"
diff --git a/modules/nf-core/fastavalidator/tests/main.nf.test b/modules/nf-core/fastavalidator/tests/main.nf.test
new file mode 100644
index 0000000..bb8c22c
--- /dev/null
+++ b/modules/nf-core/fastavalidator/tests/main.nf.test
@@ -0,0 +1,60 @@
+nextflow_process {
+
+    name "Test Process FASTAVALIDATOR"
+    script "../main.nf"
+    process "FASTAVALIDATOR"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "fastavalidator"
+
+    test("sarscov2-fasta-valid") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:false ], // meta map
+                    file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() },
+                { assert process.out.success_log != null },
+                { assert process.out.error_log == [] },
+                { assert path(process.out.success_log.get(0).get(1)).getText().contains("Validation successful...") }
+            )
+        }
+
+    }
+
+    test("sarscov2-gff3-invalid") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:false ], // meta map
+                    file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() },
+                { assert process.out.success_log == [] },
+                { assert process.out.error_log != null },
+                { assert path(process.out.error_log.get(0).get(1)).getText().contains("genome.gff3 does not start with a >") }
+            )
+        }
+
+    }
+}
diff --git a/modules/nf-core/fastavalidator/tests/main.nf.test.snap b/modules/nf-core/fastavalidator/tests/main.nf.test.snap
new file mode 100644
index 0000000..382dee7
--- /dev/null
+++ b/modules/nf-core/fastavalidator/tests/main.nf.test.snap
@@ -0,0 +1,76 @@
+{
+    "sarscov2-fasta-valid": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.success.log:md5,b0b859eda1db5cd43915846e00ebc22c"
+                    ]
+                ],
+                "1": [
+                    
+                ],
+                "2": [
+                    "versions.yml:md5,05aa059840b3b4dd6d88bc1e4bf976d7"
+                ],
+                "error_log": [
+                    
+                ],
+                "success_log": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.success.log:md5,b0b859eda1db5cd43915846e00ebc22c"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,05aa059840b3b4dd6d88bc1e4bf976d7"
+                ]
+            }
+        ],
+        "timestamp": "2023-11-28T11:23:25.106872"
+    },
+    "sarscov2-gff3-invalid": {
+        "content": [
+            {
+                "0": [
+                    
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.error.log:md5,531d520c0e7767176f743f197f1f87b3"
+                    ]
+                ],
+                "2": [
+                    "versions.yml:md5,05aa059840b3b4dd6d88bc1e4bf976d7"
+                ],
+                "error_log": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.error.log:md5,531d520c0e7767176f743f197f1f87b3"
+                    ]
+                ],
+                "success_log": [
+                    
+                ],
+                "versions": [
+                    "versions.yml:md5,05aa059840b3b4dd6d88bc1e4bf976d7"
+                ]
+            }
+        ],
+        "timestamp": "2023-11-28T11:23:29.40324"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/fastavalidator/tests/tags.yml b/modules/nf-core/fastavalidator/tests/tags.yml
new file mode 100644
index 0000000..c3c7757
--- /dev/null
+++ b/modules/nf-core/fastavalidator/tests/tags.yml
@@ -0,0 +1,2 @@
+fastavalidator:
+  - "modules/nf-core/fastavalidator/**"
diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf
index c8e815a..5fac3c1 100644
--- a/modules/nf-core/fastp/main.nf
+++ b/modules/nf-core/fastp/main.nf
@@ -45,7 +45,7 @@ process FASTP {
             $adapter_list \\
             $fail_fastq \\
             $args \\
-            2> ${prefix}.fastp.log \\
+            2> >(tee ${prefix}.fastp.log >&2) \\
         | gzip -c > ${prefix}.fastp.fastq.gz
 
         cat <<-END_VERSIONS > versions.yml
@@ -66,7 +66,7 @@ process FASTP {
             $adapter_list \\
             $fail_fastq \\
             $args \\
-            2> ${prefix}.fastp.log
+            2> >(tee ${prefix}.fastp.log >&2)
 
         cat <<-END_VERSIONS > versions.yml
         "${task.process}":
@@ -91,7 +91,7 @@ process FASTP {
             --thread $task.cpus \\
             --detect_adapter_for_pe \\
             $args \\
-            2> ${prefix}.fastp.log
+            2> >(tee ${prefix}.fastp.log >&2)
 
         cat <<-END_VERSIONS > versions.yml
         "${task.process}":
diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf
index 50e59f2..9e19a74 100644
--- a/modules/nf-core/fastqc/main.nf
+++ b/modules/nf-core/fastqc/main.nf
@@ -37,7 +37,7 @@ process FASTQC {
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" )
+        fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' )
     END_VERSIONS
     """
 
@@ -49,7 +49,7 @@ process FASTQC {
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" )
+        fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' )
     END_VERSIONS
     """
 }
diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test
index 6437a14..b9e8f92 100644
--- a/modules/nf-core/fastqc/tests/main.nf.test
+++ b/modules/nf-core/fastqc/tests/main.nf.test
@@ -38,4 +38,72 @@ nextflow_process {
             )
         }
     }
+// TODO
+// //
+// // Test with paired-end data
+// //
+// workflow test_fastqc_paired_end {
+//     input = [
+//                 [id: 'test', single_end: false], // meta map
+//                 [
+//                     file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+//                     file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
+//                 ]
+//             ]
+
+//     FASTQC ( input )
+// }
+
+// //
+// // Test with interleaved data
+// //
+// workflow test_fastqc_interleaved {
+//     input = [
+//                 [id: 'test', single_end: false], // meta map
+//                 file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true)
+//             ]
+
+//     FASTQC ( input )
+// }
+
+// //
+// // Test with bam data
+// //
+// workflow test_fastqc_bam {
+//     input = [
+//                 [id: 'test', single_end: false], // meta map
+//                 file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
+//             ]
+
+//     FASTQC ( input )
+// }
+
+// //
+// // Test with multiple samples
+// //
+// workflow test_fastqc_multiple {
+//     input = [
+//                 [id: 'test', single_end: false], // meta map
+//                 [
+//                     file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+//                     file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true),
+//                     file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true),
+//                     file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true)
+//                 ]
+//             ]
+
+//     FASTQC ( input )
+// }
+
+// //
+// // Test with custom prefix
+// //
+// workflow test_fastqc_custom_prefix {
+//     input = [
+//                 [ id:'mysample', single_end:true ], // meta map
+//                 file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
+//             ]
+
+//     FASTQC ( input )
+// }
 }
diff --git a/modules/nf-core/gffread/main.nf b/modules/nf-core/gffread/main.nf
index 68f8045..d8a473e 100644
--- a/modules/nf-core/gffread/main.nf
+++ b/modules/nf-core/gffread/main.nf
@@ -11,20 +11,22 @@ process GFFREAD {
     path gff
 
     output:
-    path "*.gtf"        , emit: gtf
+    path "*.gtf"        , emit: gtf         , optional: true
+    path "*.gff3"       , emit: gffread_gff , optional: true
     path "versions.yml" , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
 
     script:
-    def args   = task.ext.args   ?: ''
-    def prefix = task.ext.prefix ?: "${gff.baseName}"
+    def args        = task.ext.args   ?: ''
+    def prefix      = task.ext.prefix ?: "${gff.baseName}"
+    def extension   = args.contains("-T") ? 'gtf' : 'gffread.gff3'
     """
     gffread \\
         $gff \\
         $args \\
-        -o ${prefix}.gtf
+        -o ${prefix}.${extension}
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         gffread: \$(gffread --version 2>&1)
diff --git a/modules/nf-core/gffread/meta.yml b/modules/nf-core/gffread/meta.yml
index f486f8b..27ac310 100644
--- a/modules/nf-core/gffread/meta.yml
+++ b/modules/nf-core/gffread/meta.yml
@@ -20,8 +20,12 @@ input:
 output:
   - gtf:
       type: file
-      description: GTF file resulting from the conversion of the GFF input file
+      description: GTF file resulting from the conversion of the GFF input file if '-T' argument is present
       pattern: "*.{gtf}"
+  - gffread_gff:
+      type: file
+      description: GFF3 file resulting from the conversion of the GFF input file if '-T' argument is absent
+      pattern: "*.{gff3}"
   - versions:
       type: file
       description: File containing software versions
diff --git a/modules/nf-core/gffread/tests/main.nf.test b/modules/nf-core/gffread/tests/main.nf.test
index 67d47ec..3c064b3 100644
--- a/modules/nf-core/gffread/tests/main.nf.test
+++ b/modules/nf-core/gffread/tests/main.nf.test
@@ -3,11 +3,38 @@ nextflow_process {
     name "Test Process GFFREAD"
     script "../main.nf"
     process "GFFREAD"
+
     tag "gffread"
     tag "modules_nfcore"
     tag "modules"
 
-    test("Should run without failures") {
+    test("sarscov2-gff3-gtf") {
+
+        config "./nextflow.config"
+
+        when {
+            params {
+                outdir = "$outputDir"
+            }
+            process {
+                """
+                input[0] = file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true)
+                """
+            }
+        }
+
+        then {
+            assertAll (
+            { assert process.success },
+            { assert snapshot(process.out).match() },
+            { assert process.out.gtf != null },
+            { assert process.out.gffread_gff == [] }
+            )
+        }
+
+    }
+
+    test("sarscov2-gff3-gff3") {
 
         when {
             params {
@@ -23,7 +50,9 @@ nextflow_process {
         then {
             assertAll (
             { assert process.success },
-            { assert snapshot(process.out).match() }
+            { assert snapshot(process.out).match() },
+            { assert process.out.gtf == [] },
+            { assert process.out.gffread_gff != null },
             )
         }
 
diff --git a/modules/nf-core/gffread/tests/main.nf.test.snap b/modules/nf-core/gffread/tests/main.nf.test.snap
index fb5460c..1f1342e 100644
--- a/modules/nf-core/gffread/tests/main.nf.test.snap
+++ b/modules/nf-core/gffread/tests/main.nf.test.snap
@@ -1,21 +1,52 @@
 {
-    "Should run without failures": {
+    "sarscov2-gff3-gtf": {
         "content": [
             {
                 "0": [
-                    "genome.gtf:md5,f184f856b7fe3e159d21b052b5dd3954"
+                    "genome.gtf:md5,2394072d7d31530dfd590c4a117bf6e3"
                 ],
                 "1": [
+                    
+                ],
+                "2": [
+                    "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6"
+                ],
+                "gffread_gff": [
+                    
+                ],
+                "gtf": [
+                    "genome.gtf:md5,2394072d7d31530dfd590c4a117bf6e3"
+                ],
+                "versions": [
+                    "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6"
+                ]
+            }
+        ],
+        "timestamp": "2023-11-29T15:39:30.006985"
+    },
+    "sarscov2-gff3-gff3": {
+        "content": [
+            {
+                "0": [
+                    
+                ],
+                "1": [
+                    "genome.gffread.gff3:md5,a7d40d99dcddac23ac673c473279ea2d"
+                ],
+                "2": [
                     "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6"
                 ],
+                "gffread_gff": [
+                    "genome.gffread.gff3:md5,a7d40d99dcddac23ac673c473279ea2d"
+                ],
                 "gtf": [
-                    "genome.gtf:md5,f184f856b7fe3e159d21b052b5dd3954"
+                    
                 ],
                 "versions": [
                     "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6"
                 ]
             }
         ],
-        "timestamp": "2023-10-17T10:00:08.542490523"
+        "timestamp": "2023-11-29T15:39:34.636061"
     }
 }
\ No newline at end of file
diff --git a/modules/nf-core/gffread/tests/nextflow.config b/modules/nf-core/gffread/tests/nextflow.config
new file mode 100644
index 0000000..74b2509
--- /dev/null
+++ b/modules/nf-core/gffread/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+    withName: GFFREAD {
+        ext.args = '-T'
+    }
+}
diff --git a/modules/nf-core/samtools/cat/environment.yml b/modules/nf-core/samtools/cat/environment.yml
index 80da1cf..0455a7d 100644
--- a/modules/nf-core/samtools/cat/environment.yml
+++ b/modules/nf-core/samtools/cat/environment.yml
@@ -4,4 +4,4 @@ channels:
   - bioconda
   - defaults
 dependencies:
-  - bioconda::samtools=1.17
+  - bioconda::samtools=1.18
diff --git a/modules/nf-core/samtools/cat/main.nf b/modules/nf-core/samtools/cat/main.nf
index 5d939aa..b3b2508 100644
--- a/modules/nf-core/samtools/cat/main.nf
+++ b/modules/nf-core/samtools/cat/main.nf
@@ -4,8 +4,8 @@ process SAMTOOLS_CAT {
 
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' :
-        'biocontainers/samtools:1.17--h00cdaf9_0' }"
+        'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' :
+        'biocontainers/samtools:1.18--h50ea8bc_1' }"
 
     input:
     tuple val(meta),  path(input_files, stageAs: "?/*")
diff --git a/modules/nf-core/samtools/cat/tests/main.nf.test b/modules/nf-core/samtools/cat/tests/main.nf.test
new file mode 100644
index 0000000..49c633f
--- /dev/null
+++ b/modules/nf-core/samtools/cat/tests/main.nf.test
@@ -0,0 +1,72 @@
+nextflow_process {
+
+    name "Test Process SAMTOOLS_CAT"
+    script "../main.nf"
+    process "SAMTOOLS_CAT"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "samtools"
+    tag "samtools/cat"
+
+    test("sarscov2 - [bam1, bam2]") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:false ], // meta map
+                    [
+                        file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true),
+                        file(params.test_data['sarscov2']['illumina']['test_unaligned_bam'], checkIfExists: true)
+                    ]
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    file(process.out.bam[0][1]).name,
+                    process.out.cram,
+                    process.out.versions
+                ).match() }
+            )
+        }
+
+    }
+
+    test("sarscov2 - [bam1, bam2] - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:false ], // meta map
+                    [
+                        file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true),
+                        file(params.test_data['sarscov2']['illumina']['test_unaligned_bam'], checkIfExists: true)
+                    ]
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    file(process.out.bam[0][1]).name,
+                    process.out.cram,
+                    process.out.versions
+                ).match() }
+            )
+        }
+
+    }
+
+}
diff --git a/modules/nf-core/samtools/cat/tests/main.nf.test.snap b/modules/nf-core/samtools/cat/tests/main.nf.test.snap
new file mode 100644
index 0000000..298e25d
--- /dev/null
+++ b/modules/nf-core/samtools/cat/tests/main.nf.test.snap
@@ -0,0 +1,26 @@
+{
+    "sarscov2 - [bam1, bam2]": {
+        "content": [
+            "test.bam",
+            [
+                
+            ],
+            [
+                "versions.yml:md5,f10a4f6b2e0272bef2ceb4ca826a15a1"
+            ]
+        ],
+        "timestamp": "2023-12-04T14:00:18.264348819"
+    },
+    "sarscov2 - [bam1, bam2] - stub": {
+        "content": [
+            "test.bam",
+            [
+                
+            ],
+            [
+                "versions.yml:md5,f10a4f6b2e0272bef2ceb4ca826a15a1"
+            ]
+        ],
+        "timestamp": "2023-12-04T14:03:17.714482742"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/samtools/cat/tests/tags.yml b/modules/nf-core/samtools/cat/tests/tags.yml
new file mode 100644
index 0000000..9760557
--- /dev/null
+++ b/modules/nf-core/samtools/cat/tests/tags.yml
@@ -0,0 +1,2 @@
+samtools/cat:
+  - "modules/nf-core/samtools/cat/**"
diff --git a/modules/nf-core/star/align/environment.yml b/modules/nf-core/star/align/environment.yml
index 6db2098..36fcd02 100644
--- a/modules/nf-core/star/align/environment.yml
+++ b/modules/nf-core/star/align/environment.yml
@@ -5,5 +5,5 @@ channels:
   - defaults
 dependencies:
   - bioconda::star=2.7.10a
-  - bioconda::samtools=1.16.1
+  - bioconda::samtools=1.18
   - conda-forge::gawk=5.1.0
diff --git a/modules/nf-core/star/align/main.nf b/modules/nf-core/star/align/main.nf
index fa645a6..8e9c48b 100644
--- a/modules/nf-core/star/align/main.nf
+++ b/modules/nf-core/star/align/main.nf
@@ -4,8 +4,8 @@ process STAR_ALIGN {
 
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' :
-        'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' }"
+        'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' :
+        'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' }"
 
     input:
     tuple val(meta), path(reads, stageAs: "input*/*")
diff --git a/modules/nf-core/star/align/tests/main.nf.test b/modules/nf-core/star/align/tests/main.nf.test
new file mode 100644
index 0000000..4c87847
--- /dev/null
+++ b/modules/nf-core/star/align/tests/main.nf.test
@@ -0,0 +1,339 @@
+nextflow_process {
+
+    name "Test Process STAR_ALIGN"
+    script "../main.nf"
+    process "STAR_ALIGN"
+    tag "modules"
+    tag "modules_nfcore"
+    tag "star"
+    tag "star/align"
+
+    test("homo_sapiens - single_end") {
+        config "./nextflow.config"
+
+        setup {
+            run("STAR_GENOMEGENERATE") {
+                script "../../../star/genomegenerate/main.nf"
+                process {
+                    """
+                    input[0] = Channel.of([
+                        [ id:'test_fasta' ],
+                        [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
+                    ])
+                    input[1] = Channel.of([
+                        [ id:'test_gtf' ],
+                        [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
+                    ])
+                    """
+                }
+            }
+        }
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test', single_end:true ], // meta map
+                    [ file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true) ]
+                ])
+                input[1] = STAR_GENOMEGENERATE.out.index
+                input[2] = Channel.of([
+                    [ id:'test_gtf' ],
+                    [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
+                ])
+                input[3] = false
+                input[4] = 'illumina'
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - single_end - log_final") },
+                { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - single_end - log_out") },
+                { assert snapshot(process.out.bam).match("homo_sapiens - single_end - bam") },
+                { assert snapshot(process.out.bam_sorted).match("homo_sapiens - single_end - bam_sorted") },
+                { assert snapshot(process.out.bam_transcript).match("homo_sapiens - single_end - bam_transcript") },
+                { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - single_end - bam_unsorted") },
+                { assert snapshot(process.out.bedgraph).match("homo_sapiens - single_end - bedgraph") },
+                { assert snapshot(process.out.fastq).match("homo_sapiens - single_end - fastq") },
+                { assert snapshot(process.out.junction).match("homo_sapiens - single_end - junction") },
+                { assert snapshot(process.out.log_progress).match("homo_sapiens - single_end - log_progress") },
+                { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - single_end - read_per_gene_tab") },
+                { assert snapshot(process.out.sam).match("homo_sapiens - single_end - sam") },
+                { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - single_end - spl_junc_tab") },
+                { assert snapshot(process.out.tab).match("homo_sapiens - single_end - tab") },
+                { assert snapshot(process.out.wig).match("homo_sapiens - single_end - wig") },
+                { assert snapshot(process.out.versions).match("homo_sapiens - single_end - versions") }
+            )
+        }
+    }
+
+    test("homo_sapiens - paired_end") {
+        config "./nextflow.config"
+
+        setup {
+            run("STAR_GENOMEGENERATE") {
+                script "../../../star/genomegenerate/main.nf"
+                process {
+                    """
+                    input[0] = Channel.of([
+                        [ id:'test_fasta' ],
+                        [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
+                    ])
+                    input[1] = Channel.of([
+                        [ id:'test_gtf' ],
+                        [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
+                    ])
+                    """
+                }
+            }
+        }
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test', single_end:false ], // meta map
+                    [
+                        file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true),
+                        file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_2_fastq_gz'], checkIfExists: true)
+                    ]
+                ])
+                input[1] = STAR_GENOMEGENERATE.out.index
+                input[2] = Channel.of([
+                    [ id:'test_gtf' ],
+                    [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
+                ])
+                input[3] = false
+                input[4] = 'illumina'
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - paired_end - log_final") },
+                { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - paired_end - log_out") },
+                { assert snapshot(process.out.bam).match("homo_sapiens - paired_end - bam") },
+                { assert snapshot(process.out.bam_sorted).match("homo_sapiens - paired_end - bam_sorted") },
+                { assert snapshot(process.out.bam_transcript).match("homo_sapiens - paired_end - bam_transcript") },
+                { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - paired_end - bam_unsorted") },
+                { assert snapshot(process.out.bedgraph).match("homo_sapiens - paired_end - bedgraph") },
+                { assert snapshot(process.out.fastq).match("homo_sapiens - paired_end - fastq") },
+                { assert snapshot(process.out.junction).match("homo_sapiens - paired_end - junction") },
+                { assert snapshot(process.out.log_progress).match("homo_sapiens - paired_end - log_progress") },
+                { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - paired_end - read_per_gene_tab") },
+                { assert snapshot(process.out.sam).match("homo_sapiens - paired_end - sam") },
+                { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - paired_end - spl_junc_tab") },
+                { assert snapshot(process.out.tab).match("homo_sapiens - paired_end - tab") },
+                { assert snapshot(process.out.wig).match("homo_sapiens - paired_end - wig") },
+                { assert snapshot(process.out.versions).match("homo_sapiens - paired_end - versions") }
+            )
+        }
+    }
+
+    test("homo_sapiens - paired_end - arriba") {
+        config "./nextflow.arriba.config"
+
+        setup {
+            run("STAR_GENOMEGENERATE") {
+                script "../../../star/genomegenerate/main.nf"
+                process {
+                    """
+                    input[0] = Channel.of([
+                        [ id:'test_fasta' ],
+                        [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
+                    ])
+                    input[1] = Channel.of([
+                        [ id:'test_gtf' ],
+                        [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
+                    ])
+                    """
+                }
+            }
+        }
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test', single_end:false ], // meta map
+                    [
+                        file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true),
+                        file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_2_fastq_gz'], checkIfExists: true)
+                    ]
+                ])
+                input[1] = STAR_GENOMEGENERATE.out.index
+                input[2] = Channel.of([
+                    [ id:'test_gtf' ],
+                    [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
+                ])
+                input[3] = false
+                input[4] = 'illumina'
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - paired_end - arriba - log_final") },
+                { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - paired_end - arriba - log_out") },
+                { assert snapshot(file(process.out.log_progress[0][1]).name).match("homo_sapiens - paired_end - arriba - log_progress") },
+                { assert snapshot(process.out.bam).match("homo_sapiens - paired_end - arriba - bam") },
+                { assert snapshot(process.out.bam_sorted).match("homo_sapiens - paired_end - arriba - bam_sorted") },
+                { assert snapshot(process.out.bam_transcript).match("homo_sapiens - paired_end - arriba - bam_transcript") },
+                { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - paired_end - arriba - bam_unsorted") },
+                { assert snapshot(process.out.bedgraph).match("homo_sapiens - paired_end - arriba - bedgraph") },
+                { assert snapshot(process.out.fastq).match("homo_sapiens - paired_end - arriba - fastq") },
+                { assert snapshot(process.out.junction).match("homo_sapiens - paired_end - arriba - junction") },
+                { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - paired_end - arriba - read_per_gene_tab") },
+                { assert snapshot(process.out.sam).match("homo_sapiens - paired_end - arriba - sam") },
+                { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - paired_end - arriba - spl_junc_tab") },
+                { assert snapshot(process.out.tab).match("homo_sapiens - paired_end - arriba - tab") },
+                { assert snapshot(process.out.wig).match("homo_sapiens - paired_end - arriba - wig") },
+                { assert snapshot(process.out.versions).match("homo_sapiens - paired_end - arriba - versions") }
+            )
+        }
+    }
+
+    test("homo_sapiens - paired_end - starfusion") {
+        config "./nextflow.starfusion.config"
+
+        setup {
+            run("STAR_GENOMEGENERATE") {
+                script "../../../star/genomegenerate/main.nf"
+                process {
+                    """
+                    input[0] = Channel.of([
+                        [ id:'test_fasta' ],
+                        [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
+                    ])
+                    input[1] = Channel.of([
+                        [ id:'test_gtf' ],
+                        [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
+                    ])
+                    """
+                }
+            }
+        }
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test', single_end:false ], // meta map
+                    [
+                        file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true),
+                        file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_2_fastq_gz'], checkIfExists: true)
+                    ]
+                ])
+                input[1] = STAR_GENOMEGENERATE.out.index
+                input[2] = Channel.of([
+                    [ id:'test_gtf' ],
+                    [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
+                ])
+                input[3] = false
+                input[4] = 'illumina'
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - paired_end - starfusion - log_final") },
+                { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - paired_end - starfusion - log_out") },
+                { assert snapshot(file(process.out.log_progress[0][1]).name).match("homo_sapiens - paired_end - starfusion - log_progress") },
+                { assert snapshot(process.out.bam).match("homo_sapiens - paired_end - starfusion - bam") },
+                { assert snapshot(process.out.bam_sorted).match("homo_sapiens - paired_end - starfusion - bam_sorted") },
+                { assert snapshot(process.out.bam_transcript).match("homo_sapiens - paired_end - starfusion - bam_transcript") },
+                { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - paired_end - starfusion - bam_unsorted") },
+                { assert snapshot(process.out.bedgraph).match("homo_sapiens - paired_end - starfusion - bedgraph") },
+                { assert snapshot(process.out.fastq).match("homo_sapiens - paired_end - starfusion - fastq") },
+                { assert snapshot(process.out.junction).match("homo_sapiens - paired_end - starfusion - junction") },
+                { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - paired_end - starfusion - read_per_gene_tab") },
+                { assert snapshot(process.out.sam).match("homo_sapiens - paired_end - starfusion - sam") },
+                { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - paired_end - starfusion - spl_junc_tab") },
+                { assert snapshot(process.out.tab).match("homo_sapiens - paired_end - starfusion - tab") },
+                { assert snapshot(process.out.wig).match("homo_sapiens - paired_end - starfusion - wig") },
+                { assert snapshot(process.out.versions).match("homo_sapiens - paired_end - starfusion - versions") }
+            )
+        }
+    }
+
+    test("homo_sapiens - paired_end - multiple") {
+        config "./nextflow.config"
+
+        setup {
+            run("STAR_GENOMEGENERATE") {
+                script "../../../star/genomegenerate/main.nf"
+                process {
+                    """
+                    input[0] = Channel.of([
+                        [ id:'test_fasta' ],
+                        [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
+                    ])
+                    input[1] = Channel.of([
+                        [ id:'test_gtf' ],
+                        [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
+                    ])
+                    """
+                }
+            }
+        }
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test', single_end:false ], // meta map
+                    [
+                        file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true),
+                        file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_2_fastq_gz'], checkIfExists: true),
+                        file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true),
+                        file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_2_fastq_gz'], checkIfExists: true)
+                    ]
+                ])
+                input[1] = STAR_GENOMEGENERATE.out.index
+                input[2] = Channel.of([
+                    [ id:'test_gtf' ],
+                    [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
+                ])
+                input[3] = false
+                input[4] = 'illumina'
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - paired_end - multiple - log_final") },
+                { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - paired_end - multiple - log_out") },
+                { assert snapshot(file(process.out.log_progress[0][1]).name).match("homo_sapiens - paired_end - multiple - log_progress") },
+                { assert snapshot(process.out.bam).match("homo_sapiens - paired_end - multiple - bam") },
+                { assert snapshot(process.out.bam_sorted).match("homo_sapiens - paired_end - multiple - bam_sorted") },
+                { assert snapshot(process.out.bam_transcript).match("homo_sapiens - paired_end - multiple - bam_transcript") },
+                { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - paired_end - multiple - bam_unsorted") },
+                { assert snapshot(process.out.bedgraph).match("homo_sapiens - paired_end - multiple - bedgraph") },
+                { assert snapshot(process.out.fastq).match("homo_sapiens - paired_end - multiple - fastq") },
+                { assert snapshot(process.out.junction).match("homo_sapiens - paired_end - multiple - junction") },
+                { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - paired_end - multiple - read_per_gene_tab") },
+                { assert snapshot(process.out.sam).match("homo_sapiens - paired_end - multiple - sam") },
+                { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - paired_end - multiple - spl_junc_tab") },
+                { assert snapshot(process.out.tab).match("homo_sapiens - paired_end - multiple - tab") },
+                { assert snapshot(process.out.wig).match("homo_sapiens - paired_end - multiple - wig") },
+                { assert snapshot(process.out.versions).match("homo_sapiens - paired_end - multiple - versions") }
+            )
+        }
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/star/align/tests/main.nf.test.snap b/modules/nf-core/star/align/tests/main.nf.test.snap
new file mode 100644
index 0000000..08edb91
--- /dev/null
+++ b/modules/nf-core/star/align/tests/main.nf.test.snap
@@ -0,0 +1,769 @@
+{
+    "homo_sapiens - paired_end - multiple - bam_sorted": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.Aligned.sortedByCoord.out.bam:md5,ab07c21d63ab0a6c07d171d213c81d5a"
+                ]
+            ]
+        ],
+        "timestamp": "2023-12-04T18:01:19.968225733"
+    },
+    "homo_sapiens - paired_end - multiple - wig": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:29:01.857804"
+    },
+    "homo_sapiens - paired_end - arriba - tab": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.SJ.out.tab:md5,5155c9fd1f787ad6d7d80987fb06219c"
+                ]
+            ]
+        ],
+        "timestamp": "2023-12-04T17:56:12.347549723"
+    },
+    "homo_sapiens - single_end - wig": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:22:55.24701"
+    },
+    "homo_sapiens - paired_end - sam": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:23:33.383818"
+    },
+    "homo_sapiens - paired_end - arriba - versions": {
+        "content": [
+            [
+                "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f"
+            ]
+        ],
+        "timestamp": "2023-12-04T17:56:12.431212643"
+    },
+    "homo_sapiens - paired_end - multiple - bedgraph": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    [
+                        "test.Signal.Unique.str1.out.bg:md5,d7bf8b70b436ca048a62513e1d0ece3a",
+                        "test.Signal.UniqueMultiple.str1.out.bg:md5,686d58493b9eb445b56ace4d67f76ef6"
+                    ]
+                ]
+            ]
+        ],
+        "timestamp": "2023-12-04T18:01:20.07119229"
+    },
+    "homo_sapiens - paired_end - read_per_gene_tab": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:23:33.368841"
+    },
+    "homo_sapiens - paired_end - arriba - bedgraph": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:25:07.102537"
+    },
+    "homo_sapiens - single_end - junction": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:22:55.185369"
+    },
+    "homo_sapiens - paired_end - arriba - spl_junc_tab": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.SJ.out.tab:md5,5155c9fd1f787ad6d7d80987fb06219c"
+                ]
+            ]
+        ],
+        "timestamp": "2023-12-04T17:56:12.268388251"
+    },
+    "homo_sapiens - single_end - sam": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:22:55.216183"
+    },
+    "homo_sapiens - paired_end - fastq": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:23:33.327236"
+    },
+    "homo_sapiens - single_end - versions": {
+        "content": [
+            [
+                "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f"
+            ]
+        ],
+        "timestamp": "2023-12-04T17:53:26.664210196"
+    },
+    "homo_sapiens - paired_end - multiple - log_out": {
+        "content": [
+            "test.Log.out"
+        ],
+        "timestamp": "2023-11-23T13:29:01.022176"
+    },
+    "homo_sapiens - paired_end - arriba - fastq": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:25:07.15277"
+    },
+    "homo_sapiens - paired_end - multiple - junction": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:29:01.52923"
+    },
+    "homo_sapiens - paired_end - multiple - spl_junc_tab": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.SJ.out.tab:md5,069877e053714e23010fe4e1c003b4a2"
+                ]
+            ]
+        ],
+        "timestamp": "2023-12-04T18:01:20.189486201"
+    },
+    "homo_sapiens - paired_end - starfusion - log_final": {
+        "content": [
+            "test.Log.final.out"
+        ],
+        "timestamp": "2023-11-23T13:27:55.905883"
+    },
+    "homo_sapiens - paired_end - starfusion - fastq": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:27:56.192302"
+    },
+    "homo_sapiens - paired_end - multiple - sam": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:29:01.661837"
+    },
+    "homo_sapiens - paired_end - multiple - log_final": {
+        "content": [
+            "test.Log.final.out"
+        ],
+        "timestamp": "2023-11-23T13:29:00.966417"
+    },
+    "homo_sapiens - paired_end - starfusion - bam": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.Aligned.out.bam:md5,bcad07b838f6762fc01eea52b5cd3f84"
+                ]
+            ]
+        ],
+        "timestamp": "2023-12-04T17:59:58.53235164"
+    },
+    "homo_sapiens - paired_end - arriba - junction": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:25:07.202776"
+    },
+    "homo_sapiens - single_end - bedgraph": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": true
+                    },
+                    [
+                        "test.Signal.Unique.str1.out.bg:md5,c56fc1472776fb927eaf62d973da5f9a",
+                        "test.Signal.UniqueMultiple.str1.out.bg:md5,e93373cf6f2a2a9506e2efdb260cdd4f"
+                    ]
+                ]
+            ]
+        ],
+        "timestamp": "2023-12-04T17:53:26.394863748"
+    },
+    "homo_sapiens - paired_end - arriba - read_per_gene_tab": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:25:07.251962"
+    },
+    "homo_sapiens - paired_end - starfusion - bam_sorted": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:27:56.040843"
+    },
+    "homo_sapiens - single_end - bam_unsorted": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:22:55.154172"
+    },
+    "homo_sapiens - paired_end - bam": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.Aligned.sortedByCoord.out.bam:md5,b9ee1c607e07323bc1652ef3babb543f"
+                ]
+            ]
+        ],
+        "timestamp": "2023-12-04T17:54:11.934832258"
+    },
+    "homo_sapiens - paired_end - arriba - bam_transcript": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:25:06.998817"
+    },
+    "homo_sapiens - paired_end - log_out": {
+        "content": [
+            "test.Log.out"
+        ],
+        "timestamp": "2023-11-23T13:23:33.259699"
+    },
+    "homo_sapiens - paired_end - arriba - log_out": {
+        "content": [
+            "test.Log.out"
+        ],
+        "timestamp": "2023-11-23T13:25:06.849451"
+    },
+    "homo_sapiens - paired_end - multiple - versions": {
+        "content": [
+            [
+                "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f"
+            ]
+        ],
+        "timestamp": "2023-12-04T18:01:20.393705142"
+    },
+    "homo_sapiens - paired_end - starfusion - bam_transcript": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:27:56.082408"
+    },
+    "homo_sapiens - paired_end - starfusion - tab": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.SJ.out.tab:md5,19c3faa1bfa9a0cc5e4c45f17065b53a"
+                ]
+            ]
+        ],
+        "timestamp": "2023-12-04T17:59:58.818041322"
+    },
+    "homo_sapiens - single_end - fastq": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:22:55.175307"
+    },
+    "homo_sapiens - paired_end - tab": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.SJ.out.tab:md5,844af19ab0fc8cd9a3f75228445aca0d"
+                ]
+            ]
+        ],
+        "timestamp": "2023-12-04T17:54:12.255481058"
+    },
+    "homo_sapiens - paired_end - starfusion - bedgraph": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:27:56.155413"
+    },
+    "homo_sapiens - single_end - bam_transcript": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:22:55.144852"
+    },
+    "homo_sapiens - paired_end - versions": {
+        "content": [
+            [
+                "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f"
+            ]
+        ],
+        "timestamp": "2023-12-04T17:54:12.343840482"
+    },
+    "homo_sapiens - paired_end - multiple - tab": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.SJ.out.tab:md5,069877e053714e23010fe4e1c003b4a2"
+                ]
+            ]
+        ],
+        "timestamp": "2023-12-04T18:01:20.291692062"
+    },
+    "homo_sapiens - single_end - bam": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": true
+                    },
+                    "test.Aligned.sortedByCoord.out.bam:md5,c6cfaccaf91bc7fdabed3cfe236d4535"
+                ]
+            ]
+        ],
+        "timestamp": "2023-12-04T17:53:26.265642675"
+    },
+    "homo_sapiens - paired_end - arriba - wig": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:25:07.444214"
+    },
+    "homo_sapiens - paired_end - log_progress": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.Log.progress.out:md5,b2bd061d6cbaaf3d6d3b1fed547f69b8"
+                ]
+            ]
+        ],
+        "timestamp": "2023-12-04T17:54:12.126063825"
+    },
+    "homo_sapiens - paired_end - arriba - log_final": {
+        "content": [
+            "test.Log.final.out"
+        ],
+        "timestamp": "2023-11-23T13:25:06.829799"
+    },
+    "homo_sapiens - paired_end - bam_unsorted": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:23:33.300509"
+    },
+    "homo_sapiens - paired_end - arriba - sam": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:25:07.300383"
+    },
+    "homo_sapiens - paired_end - multiple - bam": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.Aligned.sortedByCoord.out.bam:md5,ab07c21d63ab0a6c07d171d213c81d5a"
+                ]
+            ]
+        ],
+        "timestamp": "2023-12-04T18:01:19.851247126"
+    },
+    "homo_sapiens - paired_end - multiple - fastq": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:29:01.462257"
+    },
+    "homo_sapiens - single_end - bam_sorted": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": true
+                    },
+                    "test.Aligned.sortedByCoord.out.bam:md5,c6cfaccaf91bc7fdabed3cfe236d4535"
+                ]
+            ]
+        ],
+        "timestamp": "2023-12-04T17:53:26.335457371"
+    },
+    "homo_sapiens - paired_end - arriba - bam_sorted": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:25:06.94699"
+    },
+    "homo_sapiens - paired_end - starfusion - junction": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.Chimeric.out.junction:md5,c10ef219f4a30e83711b995bc5e40dba"
+                ]
+            ]
+        ],
+        "timestamp": "2023-12-04T17:59:58.641115828"
+    },
+    "homo_sapiens - single_end - tab": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": true
+                    },
+                    "test.SJ.out.tab:md5,75a516ab950fb958f40b29996474949c"
+                ]
+            ]
+        ],
+        "timestamp": "2023-12-04T17:53:26.580593434"
+    },
+    "homo_sapiens - paired_end - starfusion - versions": {
+        "content": [
+            [
+                "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f"
+            ]
+        ],
+        "timestamp": "2023-12-04T17:59:58.907317103"
+    },
+    "homo_sapiens - paired_end - multiple - bam_unsorted": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:29:01.330463"
+    },
+    "homo_sapiens - paired_end - arriba - log_progress": {
+        "content": [
+            "test.Log.progress.out"
+        ],
+        "timestamp": "2023-11-23T13:25:06.86866"
+    },
+    "homo_sapiens - paired_end - bedgraph": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    [
+                        "test.Signal.Unique.str1.out.bg:md5,d7bf8b70b436ca048a62513e1d0ece3a",
+                        "test.Signal.UniqueMultiple.str1.out.bg:md5,686d58493b9eb445b56ace4d67f76ef6"
+                    ]
+                ]
+            ]
+        ],
+        "timestamp": "2023-12-04T17:54:12.064121304"
+    },
+    "homo_sapiens - paired_end - starfusion - bam_unsorted": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:27:56.118974"
+    },
+    "homo_sapiens - paired_end - starfusion - read_per_gene_tab": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:27:56.264699"
+    },
+    "homo_sapiens - paired_end - multiple - log_progress": {
+        "content": [
+            "test.Log.progress.out"
+        ],
+        "timestamp": "2023-11-23T13:29:01.076947"
+    },
+    "homo_sapiens - paired_end - arriba - bam_unsorted": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:25:07.050409"
+    },
+    "homo_sapiens - paired_end - bam_sorted": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.Aligned.sortedByCoord.out.bam:md5,b9ee1c607e07323bc1652ef3babb543f"
+                ]
+            ]
+        ],
+        "timestamp": "2023-12-04T17:54:12.002180537"
+    },
+    "homo_sapiens - single_end - spl_junc_tab": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": true
+                    },
+                    "test.SJ.out.tab:md5,75a516ab950fb958f40b29996474949c"
+                ]
+            ]
+        ],
+        "timestamp": "2023-12-04T17:53:26.50932751"
+    },
+    "homo_sapiens - paired_end - starfusion - spl_junc_tab": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.SJ.out.tab:md5,19c3faa1bfa9a0cc5e4c45f17065b53a"
+                ]
+            ]
+        ],
+        "timestamp": "2023-12-04T17:59:58.731699486"
+    },
+    "homo_sapiens - single_end - log_out": {
+        "content": [
+            "test.Log.out"
+        ],
+        "timestamp": "2023-11-23T13:22:55.126286"
+    },
+    "homo_sapiens - paired_end - log_final": {
+        "content": [
+            "test.Log.final.out"
+        ],
+        "timestamp": "2023-11-23T13:23:33.253884"
+    },
+    "homo_sapiens - single_end - log_final": {
+        "content": [
+            "test.Log.final.out"
+        ],
+        "timestamp": "2023-11-23T13:22:55.11799"
+    },
+    "homo_sapiens - paired_end - bam_transcript": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:23:33.287684"
+    },
+    "homo_sapiens - paired_end - starfusion - log_progress": {
+        "content": [
+            "test.Log.progress.out"
+        ],
+        "timestamp": "2023-11-23T13:27:55.971484"
+    },
+    "homo_sapiens - paired_end - multiple - bam_transcript": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:29:01.264176"
+    },
+    "homo_sapiens - paired_end - multiple - read_per_gene_tab": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:29:01.596406"
+    },
+    "homo_sapiens - single_end - read_per_gene_tab": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:22:55.205936"
+    },
+    "homo_sapiens - paired_end - junction": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:23:33.340653"
+    },
+    "homo_sapiens - paired_end - spl_junc_tab": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.SJ.out.tab:md5,844af19ab0fc8cd9a3f75228445aca0d"
+                ]
+            ]
+        ],
+        "timestamp": "2023-12-04T17:54:12.185730856"
+    },
+    "homo_sapiens - paired_end - starfusion - sam": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:27:56.300637"
+    },
+    "homo_sapiens - paired_end - arriba - bam": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.Aligned.out.bam:md5,c1b1747f5873f2d17762725636e891d5"
+                ]
+            ]
+        ],
+        "timestamp": "2023-12-04T17:56:12.190560178"
+    },
+    "homo_sapiens - single_end - log_progress": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": true
+                    },
+                    "test.Log.progress.out:md5,b2bd061d6cbaaf3d6d3b1fed547f69b8"
+                ]
+            ]
+        ],
+        "timestamp": "2023-12-04T17:53:26.450352138"
+    },
+    "homo_sapiens - paired_end - starfusion - wig": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:27:56.422018"
+    },
+    "homo_sapiens - paired_end - wig": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-23T13:23:33.429457"
+    },
+    "homo_sapiens - paired_end - starfusion - log_out": {
+        "content": [
+            "test.Log.out"
+        ],
+        "timestamp": "2023-11-23T13:27:55.93945"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/star/align/tests/nextflow.arriba.config b/modules/nf-core/star/align/tests/nextflow.arriba.config
new file mode 100644
index 0000000..2324b9e
--- /dev/null
+++ b/modules/nf-core/star/align/tests/nextflow.arriba.config
@@ -0,0 +1,14 @@
+process {
+
+    withName: STAR_GENOMEGENERATE {
+        ext.args = '--genomeSAindexNbases 9'
+    }
+
+    withName: STAR_ALIGN {
+        ext.args = '--readFilesCommand zcat --outSAMtype BAM Unsorted --outSAMunmapped Within --outBAMcompression 0 --outFilterMultimapNmax 50 --peOverlapNbasesMin 10 --alignSplicedMateMapLminOverLmate 0.5 --alignSJstitchMismatchNmax 5 -1 5 5 --chimSegmentMin 10 --chimOutType WithinBAM HardClip --chimJunctionOverhangMin 10 --chimScoreDropMax 30 --chimScoreJunctionNonGTAG 0 --chimScoreSeparation 1 --chimSegmentReadGapMax 3 --chimMultimapNmax 50'
+    }
+
+}
+
+// Fix chown issue for the output star folder
+docker.runOptions = '--platform=linux/amd64 -u $(id -u):$(id -g)'
diff --git a/modules/nf-core/star/align/tests/nextflow.config b/modules/nf-core/star/align/tests/nextflow.config
new file mode 100644
index 0000000..c4ac580
--- /dev/null
+++ b/modules/nf-core/star/align/tests/nextflow.config
@@ -0,0 +1,14 @@
+process {
+
+    withName: STAR_GENOMEGENERATE {
+        ext.args = '--genomeSAindexNbases 9'
+    }
+
+    withName: STAR_ALIGN {
+        ext.args = '--readFilesCommand zcat --outSAMtype BAM SortedByCoordinate --outWigType bedGraph --outWigStrand Unstranded'
+    }
+
+}
+
+// Fix chown issue for the output star folder
+docker.runOptions = '--platform=linux/amd64 -u $(id -u):$(id -g)'
diff --git a/modules/nf-core/star/align/tests/nextflow.starfusion.config b/modules/nf-core/star/align/tests/nextflow.starfusion.config
new file mode 100644
index 0000000..467b649
--- /dev/null
+++ b/modules/nf-core/star/align/tests/nextflow.starfusion.config
@@ -0,0 +1,14 @@
+process {
+
+    withName: STAR_GENOMEGENERATE {
+        ext.args = '--genomeSAindexNbases 9'
+    }
+
+    withName: STAR_ALIGN {
+        ext.args = '--readFilesCommand zcat --outSAMtype BAM Unsorted --outReadsUnmapped None --twopassMode Basic --outSAMstrandField intronMotif --outSAMunmapped Within --chimSegmentMin 12 --chimJunctionOverhangMin 8 --chimOutJunctionFormat 1 --alignSJDBoverhangMin 10 --alignMatesGapMax 100000 --alignIntronMax 100000 --alignSJstitchMismatchNmax 5 -1 5 5 --chimMultimapScoreRange 3 --chimScoreJunctionNonGTAG -4 --chimMultimapNmax 20 --chimNonchimScoreDropMin 10 --peOverlapNbasesMin 12 --peOverlapMMp 0.1 --alignInsertionFlush Right --alignSplicedMateMapLminOverLmate 0 --alignSplicedMateMapLmin 30'
+    }
+
+}
+
+// Fix chown issue for the output star folder
+docker.runOptions = '--platform=linux/amd64 -u $(id -u):$(id -g)'
diff --git a/modules/nf-core/star/align/tests/tags.yml b/modules/nf-core/star/align/tests/tags.yml
new file mode 100644
index 0000000..8beace1
--- /dev/null
+++ b/modules/nf-core/star/align/tests/tags.yml
@@ -0,0 +1,2 @@
+star/align:
+  - modules/nf-core/star/align/**
diff --git a/modules/nf-core/star/genomegenerate/environment.yml b/modules/nf-core/star/genomegenerate/environment.yml
index 0b35ff5..350a459 100644
--- a/modules/nf-core/star/genomegenerate/environment.yml
+++ b/modules/nf-core/star/genomegenerate/environment.yml
@@ -5,5 +5,5 @@ channels:
   - defaults
 dependencies:
   - bioconda::star=2.7.10a
-  - bioconda::samtools=1.16.1
+  - bioconda::samtools=1.18
   - conda-forge::gawk=5.1.0
diff --git a/modules/nf-core/star/genomegenerate/main.nf b/modules/nf-core/star/genomegenerate/main.nf
index 473e62a..2bc3e29 100644
--- a/modules/nf-core/star/genomegenerate/main.nf
+++ b/modules/nf-core/star/genomegenerate/main.nf
@@ -4,8 +4,8 @@ process STAR_GENOMEGENERATE {
 
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' :
-        'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' }"
+        'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' :
+        'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' }"
 
     input:
     tuple val(meta), path(fasta)
diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test b/modules/nf-core/star/genomegenerate/tests/main.nf.test
new file mode 100644
index 0000000..eed8292
--- /dev/null
+++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test
@@ -0,0 +1,38 @@
+nextflow_process {
+
+    name "Test Process STAR_GENOMEGENERATE"
+    script "../main.nf"
+    process "STAR_GENOMEGENERATE"
+    tag "modules"
+    tag "modules_nfcore"
+    tag "star"
+    tag "star/genomegenerate"
+
+    test("homo_sapiens") {
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test_fasta' ],
+                    [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
+                ])
+                input[1] = Channel.of([
+                    [ id:'test_gtf' ],
+                    [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
+                ])
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(file(process.out.index[0][1]).name).match("index") },
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+
+    }
+
+}
\ No newline at end of file
diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap
new file mode 100644
index 0000000..e7bb6ee
--- /dev/null
+++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap
@@ -0,0 +1,16 @@
+{
+    "versions": {
+        "content": [
+            [
+                "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f"
+            ]
+        ],
+        "timestamp": "2023-12-04T18:01:27.298248806"
+    },
+    "index": {
+        "content": [
+            "star"
+        ],
+        "timestamp": "2023-11-23T11:31:47.560528"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/star/genomegenerate/tests/tags.yml b/modules/nf-core/star/genomegenerate/tests/tags.yml
new file mode 100644
index 0000000..79f619b
--- /dev/null
+++ b/modules/nf-core/star/genomegenerate/tests/tags.yml
@@ -0,0 +1,2 @@
+star/genomegenerate:
+  - modules/nf-core/star/genomegenerate/**
diff --git a/modules/nf-core/umitools/extract/main.nf b/modules/nf-core/umitools/extract/main.nf
index a01ef73..4bd79e7 100644
--- a/modules/nf-core/umitools/extract/main.nf
+++ b/modules/nf-core/umitools/extract/main.nf
@@ -33,7 +33,7 @@ process UMITOOLS_EXTRACT {
 
         cat <<-END_VERSIONS > versions.yml
         "${task.process}":
-            umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//')
+            umitools: \$( umi_tools --version | sed '/version:/!d; s/.*: //' )
         END_VERSIONS
         """
     }  else {
@@ -49,7 +49,7 @@ process UMITOOLS_EXTRACT {
 
         cat <<-END_VERSIONS > versions.yml
         "${task.process}":
-            umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//')
+            umitools: \$( umi_tools --version | sed '/version:/!d; s/.*: //' )
         END_VERSIONS
         """
     }
diff --git a/modules/nf-core/umitools/extract/tests/main.nf.test.snap b/modules/nf-core/umitools/extract/tests/main.nf.test.snap
index 54e77fb..6d5944f 100644
--- a/modules/nf-core/umitools/extract/tests/main.nf.test.snap
+++ b/modules/nf-core/umitools/extract/tests/main.nf.test.snap
@@ -2,9 +2,9 @@
     "versions": {
         "content": [
             [
-                "versions.yml:md5,866a2da05ce1af35cc07261ffe6bc31a"
+                "versions.yml:md5,5a18da2d3a5a4de15e7aaae9082d7abb"
             ]
         ],
-        "timestamp": "2023-10-17T08:25:55.427194"
+        "timestamp": "2023-12-08T09:41:43.540658352"
     }
 }
\ No newline at end of file
diff --git a/modules/pfr/fastavalidate/main.nf b/modules/pfr/fastavalidate/main.nf
deleted file mode 100644
index 873983b..0000000
--- a/modules/pfr/fastavalidate/main.nf
+++ /dev/null
@@ -1,56 +0,0 @@
-process FASTAVALIDATE {
-    tag "$meta.id"
-    label 'process_single'
-
-    // conda "YOUR-TOOL-HERE"
-    // container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-    //     'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE':
-    //     'biocontainers/YOUR-TOOL-HERE' }"
-    container 'docker://gallvp/fasta_validator:a6a2ec1_ps'
-
-    input:
-    tuple val(meta), path(fasta)
-    
-    output:
-    tuple val(meta), path('*.validated.fasta')  , emit: valid_fasta , optional: true
-    tuple val(meta), path('*.error.log')        , emit: error_log   , optional: true
-    path "versions.yml"                         , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    """
-    fasta_validate \\
-        -v $fasta \\
-        2> "${prefix}.error.log" \\
-        || echo "Errors from fasta_validate printed to ${prefix}.error.log"
-    
-    if [ \$(cat "${prefix}.error.log" | wc -l) -gt 0 ]; then
-        echo "Validation failed..."
-        cat "${prefix}.error.log"
-    else
-        rm "${prefix}.error.log"
-        
-        cat $fasta \\
-            > "${prefix}.validated.fasta"
-    fi
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        fasta_validate: \$(md5sum \$(which fasta_validate) | cut -d' ' -f1)
-    END_VERSIONS
-    """
-    
-    stub:
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    """
-    touch "${prefix}.validated.fasta"
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        fasta_validate: \$(md5sum \$(which fasta_validate) | cut -d' ' -f1)
-    END_VERSIONS
-    """
-}
diff --git a/modules/pfr/liftoff/environment.yml b/modules/pfr/liftoff/environment.yml
new file mode 100644
index 0000000..8761c9b
--- /dev/null
+++ b/modules/pfr/liftoff/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "liftoff"
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - "bioconda::liftoff=1.6.3"
diff --git a/modules/pfr/liftoff/main.nf b/modules/pfr/liftoff/main.nf
index 5356728..a382dab 100644
--- a/modules/pfr/liftoff/main.nf
+++ b/modules/pfr/liftoff/main.nf
@@ -2,7 +2,7 @@ process LIFTOFF {
     tag "$meta.id"
     label 'process_high'
 
-    conda "bioconda::liftoff=1.6.3"
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/liftoff:1.6.3--pyhdfd78af_0':
         'biocontainers/liftoff:1.6.3--pyhdfd78af_0' }"
@@ -11,9 +11,9 @@ process LIFTOFF {
     tuple val(meta), path(target_fa)
     path ref_fa, name: 'liftoff_reference_assembly.fa' // To avoid name collisions betwen target_fa and ref_fa
     path ref_annotation
-    
+
     output:
-    tuple val(meta), path("*.gff3")             , emit: gff3
+    tuple val(meta), path("${prefix}.gff3")     , emit: gff3    // To avoid pattern collision with '*.polished.gff3'
     tuple val(meta), path("*.polished.gff3")    , emit: polished_gff3, optional: true
     tuple val(meta), path("*.unmapped.txt")     , emit: unmapped_txt
     path "versions.yml"                         , emit: versions
@@ -22,8 +22,8 @@ process LIFTOFF {
     task.ext.when == null || task.ext.when
 
     script:
-    def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
+    def args    = task.ext.args ?: ''
+    prefix      = task.ext.prefix ?: "${meta.id}"
     """
     liftoff \\
         -g $ref_annotation \\
@@ -42,9 +42,9 @@ process LIFTOFF {
         liftoff: \$(liftoff --version 2> /dev/null)
     END_VERSIONS
     """
-    
+
     stub:
-    def prefix = task.ext.prefix ?: "${meta.id}"
+    prefix = task.ext.prefix ?: "${meta.id}"
     """
     touch "${prefix}.gff3"
     touch "${prefix}.unmapped.txt"
diff --git a/modules/pfr/liftoff/meta.yml b/modules/pfr/liftoff/meta.yml
index e859282..ad1c5b8 100644
--- a/modules/pfr/liftoff/meta.yml
+++ b/modules/pfr/liftoff/meta.yml
@@ -1,3 +1,5 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
 name: "liftoff"
 description: "Liftoff is a tool that accurately maps annotations in GFF or GTF between assemblies of the same, or closely-related species."
 keywords:
@@ -46,7 +48,6 @@ output:
       type: file
       description: Polished lifted annotations for the target assembly in gff3 format
       pattern: "*.polished.gff3"
-      optional: true
   - unmapped_txt:
       type: file
       description: List of unmapped reference annotations
@@ -56,4 +57,6 @@ output:
       description: File containing software versions
       pattern: "versions.yml"
 authors:
-  - "@gallvp"
+  - "@GallVp"
+maintainers:
+  - "@GallVp"
diff --git a/modules/pfr/liftoff/tests/main.nf.test b/modules/pfr/liftoff/tests/main.nf.test
new file mode 100644
index 0000000..00d1d2a
--- /dev/null
+++ b/modules/pfr/liftoff/tests/main.nf.test
@@ -0,0 +1,89 @@
+nextflow_process {
+
+    name "Test Process LIFTOFF"
+    script "../main.nf"
+    process "LIFTOFF"
+    config "./nextflow.config"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "liftoff"
+
+    test("homo_sapiens-genome_21_fasta-genome_1_fasta-genome_1_gtf") {
+
+        setup {
+            run("GUNZIP") {
+                script "../../../nf-core/gunzip"
+
+                process {
+                    """
+                    input[0] = [
+                        [ id:'test' ],
+                        file(params.test_data['homo_sapiens']['genome']['genome_1_fasta'], checkIfExists: true)
+                    ]
+                    """
+                }
+            }
+        }
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ],
+                    file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
+                ]
+                input[1] = GUNZIP.out.gunzip.map { meta, file -> file }
+                input[2] = [
+                    file(params.test_data['homo_sapiens']['genome']['genome_1_gtf'], checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.unmapped_txt).match("unmapped_txt") },
+                { assert snapshot(process.out.versions).match("versions") },
+                { assert path(process.out.gff3.get(0).get(1)).getText().contains("chr21\tLiftoff\texon\t34608061\t34608118\t.\t+\t.") },
+                { assert path(process.out.polished_gff3.get(0).get(1)).getText().contains("chr21\tLiftoff\texon\t34608061\t34608118\t.\t+\t.") }
+            )
+        }
+
+    }
+
+    test("stub") {
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ],
+                    file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
+                ]
+                input[1] = [
+                    file(params.test_data['homo_sapiens']['genome']['genome_1_fasta'], checkIfExists: true)
+                ]
+                input[2] = [
+                    file(params.test_data['homo_sapiens']['genome']['genome_1_gtf'], checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert process.out.gff3 != null },
+                { assert process.out.polished_gff3 == [] },
+                { assert process.out.unmapped_txt != null },
+                { assert process.out.versions != null },
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+
+    }
+
+}
diff --git a/modules/pfr/liftoff/tests/main.nf.test.snap b/modules/pfr/liftoff/tests/main.nf.test.snap
new file mode 100644
index 0000000..36c39b6
--- /dev/null
+++ b/modules/pfr/liftoff/tests/main.nf.test.snap
@@ -0,0 +1,23 @@
+{
+    "unmapped_txt": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.unmapped.txt:md5,7391d10df6e15db356b084c9af5259e4"
+                ]
+            ]
+        ],
+        "timestamp": "2023-12-01T13:57:40.748507"
+    },
+    "versions": {
+        "content": [
+            [
+                "versions.yml:md5,205d9c609e7fe27d8199550d842bdce8"
+            ]
+        ],
+        "timestamp": "2023-12-01T13:57:40.752414"
+    }
+}
\ No newline at end of file
diff --git a/modules/pfr/liftoff/tests/nextflow.config b/modules/pfr/liftoff/tests/nextflow.config
new file mode 100644
index 0000000..06b9d76
--- /dev/null
+++ b/modules/pfr/liftoff/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+    withName: LIFTOFF {
+        ext.args = '-exclude_partial -copies -polish -a 0.1 -s 0.1'
+    }
+}
diff --git a/modules/pfr/liftoff/tests/tags.yml b/modules/pfr/liftoff/tests/tags.yml
new file mode 100644
index 0000000..4ae1fb0
--- /dev/null
+++ b/modules/pfr/liftoff/tests/tags.yml
@@ -0,0 +1,2 @@
+liftoff:
+  - "modules/pfr/liftoff/**"
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test
new file mode 100644
index 0000000..cdd7398
--- /dev/null
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test
@@ -0,0 +1,60 @@
+nextflow_workflow {
+
+    name "Test Workflow FASTQ_FASTQC_UMITOOLS_FASTP"
+    script "../main.nf"
+    workflow "FASTQ_FASTQC_UMITOOLS_FASTP"
+    tag "subworkflows"
+    tag "subworkflows_nfcore"
+    tag "subworkflows/fastq_fastqc_umitools_fastp"
+    tag "fastq_fastqc_umitools_fastp"
+    tag "fastqc"
+    tag "umitools/extract"
+    tag "fastp"
+
+
+    test("sarscov2 paired-end [fastq]") {
+
+        when {
+            workflow {
+                """
+                input[0] = [
+                            [ id:'test', single_end:false ], // meta map
+                            [
+                             file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+                             file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
+                            ]
+                ]
+                input[1] = false // skip_fastqc
+                input[2] = false // with_umi
+                input[3] = false // skip_umi_extract
+                input[4] = 1     // umi_discard_read
+                input[5] = false // skip_trimming
+                input[6] = []    // adapter_fasta
+                input[7] = false // save_trimmed_fail
+                input[8] = false // save_merged
+                input[9] = 1     // min_trimmed_reads
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert workflow.success },
+                { assert snapshot(workflow.out.reads).match("reads") },
+                { assert snapshot(workflow.out.umi_log).match("umi_log") },
+                { assert snapshot(workflow.out.trim_json).match("trim_json") },
+                { assert snapshot(workflow.out.trim_reads_fail).match("trim_reads_fail") },
+                { assert snapshot(workflow.out.trim_reads_merged).match("trim_reads_merged") },
+                { assert snapshot(workflow.out.trim_read_count).match("trim_read_count") },
+                { assert snapshot(workflow.out.versions).match("versions") },
+
+                { assert workflow.out.fastqc_raw_html },
+                { assert workflow.out.fastqc_raw_zip },
+                { assert workflow.out.trim_html },
+                { assert workflow.out.trim_log },
+                { assert workflow.out.fastqc_trim_html },
+                { assert workflow.out.fastqc_trim_zip }
+            )
+        }
+    }
+}
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap
new file mode 100644
index 0000000..38a65ae
--- /dev/null
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap
@@ -0,0 +1,81 @@
+{
+    "trim_reads_merged": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-26T02:28:26.26920982"
+    },
+    "trim_reads_fail": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-26T02:28:26.25861515"
+    },
+    "versions": {
+        "content": [
+            [
+                "versions.yml:md5,85bd0117e5778fff18e3920972a296ad",
+                "versions.yml:md5,c50aa59475ab901bc6f9a2cf7b1a14e0",
+                "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e"
+            ]
+        ],
+        "timestamp": "2023-11-26T02:28:26.30891403"
+    },
+    "trim_json": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.fastp.json:md5,1e0f8e27e71728e2b63fc64086be95cd"
+                ]
+            ]
+        ],
+        "timestamp": "2023-11-26T02:28:26.24768259"
+    },
+    "reads": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    [
+                        "test_1.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7",
+                        "test_2.fastp.fastq.gz:md5,25cbdca08e2083dbd4f0502de6b62f39"
+                    ]
+                ]
+            ]
+        ],
+        "timestamp": "2023-12-04T11:30:32.061644815"
+    },
+    "umi_log": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-26T02:28:26.238536"
+    },
+    "trim_read_count": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    198
+                ]
+            ]
+        ],
+        "timestamp": "2023-11-26T02:28:26.27984169"
+    }
+}
\ No newline at end of file
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/tags.yml b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/tags.yml
new file mode 100644
index 0000000..84a4b56
--- /dev/null
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/tags.yml
@@ -0,0 +1,2 @@
+subworkflows/fastq_fastqc_umitools_fastp:
+  - subworkflows/nf-core/fastq_fastqc_umitools_fastp/**

From 8a6c5fe9af6d5721dada1ac3efc5a7385c650df7 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Wed, 13 Dec 2023 12:25:18 +1300
Subject: [PATCH 25/59] Added EDTA from pfr/nxf-modules

---
 modules.json                                  |  15 ++
 modules/local/edta/edta/main.nf               |  72 --------
 modules/local/edta/restore_edta_ids/main.nf   |  63 -------
 modules/local/edta/shorten_edta_ids/main.nf   |  30 ----
 .../pfr/custom/restoregffids/environment.yml  |   9 +
 modules/pfr/custom/restoregffids/main.nf      |  35 ++++
 modules/pfr/custom/restoregffids/meta.yml     |  58 ++++++
 .../templates/restore_gff_ids.py}             |  33 ++--
 .../custom/restoregffids/tests/main.nf.test   |  63 +++++++
 .../restoregffids/tests/main.nf.test.snap     |  41 +++++
 .../pfr/custom/restoregffids/tests/tags.yml   |   2 +
 .../custom/shortenfastaids/environment.yml    |  11 ++
 modules/pfr/custom/shortenfastaids/main.nf    |  34 ++++
 modules/pfr/custom/shortenfastaids/meta.yml   |  58 ++++++
 .../templates}/shorten_fasta_ids.py           |  91 +++++-----
 .../custom/shortenfastaids/tests/main.nf.test | 131 ++++++++++++++
 .../shortenfastaids/tests/main.nf.test.snap   | 170 ++++++++++++++++++
 .../pfr/custom/shortenfastaids/tests/tags.yml |   2 +
 modules/pfr/edta/edta/environment.yml         |   9 +
 modules/pfr/edta/edta/main.nf                 |  93 ++++++++++
 modules/pfr/edta/edta/meta.yml                |  61 +++++++
 modules/pfr/edta/edta/tests/main.nf.test      |  72 ++++++++
 modules/pfr/edta/edta/tests/nextflow.config   |   3 +
 modules/pfr/edta/edta/tests/tags.yml          |   2 +
 24 files changed, 936 insertions(+), 222 deletions(-)
 delete mode 100644 modules/local/edta/edta/main.nf
 delete mode 100644 modules/local/edta/restore_edta_ids/main.nf
 delete mode 100644 modules/local/edta/shorten_edta_ids/main.nf
 create mode 100644 modules/pfr/custom/restoregffids/environment.yml
 create mode 100644 modules/pfr/custom/restoregffids/main.nf
 create mode 100644 modules/pfr/custom/restoregffids/meta.yml
 rename modules/{local/edta/restore_edta_ids/resources/usr/bin/reverse_edta_naming.py => pfr/custom/restoregffids/templates/restore_gff_ids.py} (50%)
 create mode 100644 modules/pfr/custom/restoregffids/tests/main.nf.test
 create mode 100644 modules/pfr/custom/restoregffids/tests/main.nf.test.snap
 create mode 100644 modules/pfr/custom/restoregffids/tests/tags.yml
 create mode 100644 modules/pfr/custom/shortenfastaids/environment.yml
 create mode 100644 modules/pfr/custom/shortenfastaids/main.nf
 create mode 100644 modules/pfr/custom/shortenfastaids/meta.yml
 rename modules/{local/edta/shorten_edta_ids/resources/usr/bin => pfr/custom/shortenfastaids/templates}/shorten_fasta_ids.py (55%)
 create mode 100644 modules/pfr/custom/shortenfastaids/tests/main.nf.test
 create mode 100644 modules/pfr/custom/shortenfastaids/tests/main.nf.test.snap
 create mode 100644 modules/pfr/custom/shortenfastaids/tests/tags.yml
 create mode 100644 modules/pfr/edta/edta/environment.yml
 create mode 100644 modules/pfr/edta/edta/main.nf
 create mode 100644 modules/pfr/edta/edta/meta.yml
 create mode 100644 modules/pfr/edta/edta/tests/main.nf.test
 create mode 100644 modules/pfr/edta/edta/tests/nextflow.config
 create mode 100644 modules/pfr/edta/edta/tests/tags.yml

diff --git a/modules.json b/modules.json
index cde5635..fa42bdf 100644
--- a/modules.json
+++ b/modules.json
@@ -5,6 +5,21 @@
     "git@github.com:PlantandFoodResearch/nxf-modules.git": {
       "modules": {
         "pfr": {
+          "custom/restoregffids": {
+            "branch": "main",
+            "git_sha": "e9f6bdd634bdbcd52c5568ba82f16176ec06631f",
+            "installed_by": ["modules"]
+          },
+          "custom/shortenfastaids": {
+            "branch": "main",
+            "git_sha": "5e0e41b51d7fc7f68ae43692b6fe19b95d7f3a8c",
+            "installed_by": ["modules"]
+          },
+          "edta/edta": {
+            "branch": "main",
+            "git_sha": "d14b1f1d790cc01c11f8ec5aa80a9562c6808b20",
+            "installed_by": ["modules"]
+          },
           "liftoff": {
             "branch": "main",
             "git_sha": "3593ec100c92b656204bf739a51d62fd44d81f6f",
diff --git a/modules/local/edta/edta/main.nf b/modules/local/edta/edta/main.nf
deleted file mode 100644
index 9c9b180..0000000
--- a/modules/local/edta/edta/main.nf
+++ /dev/null
@@ -1,72 +0,0 @@
-process EDTA {
-    tag "$meta.id"
-    label "process_high"
-    label "process_week_long"
-    
-    container 'https://depot.galaxyproject.org/singularity/edta:2.1.0--hdfd78af_1'
-
-    input:
-    tuple val(meta), path(fasta_file)
-    
-    output:
-    tuple val(meta), path('*.EDTA.TElib.fa')    , emit: te_lib_fasta
-    tuple val(meta), path('*.EDTA.intact.gff3') , emit: intact_gff3
-    tuple val(meta), path('*.EDTA.pass.list')   , emit: pass_list
-    tuple val(meta), path('*.EDTA.out')         , emit: out_file
-    tuple val(meta), path('*.EDTA.TEanno.gff3') , emit: te_anno_gff3
-    path "versions.yml"                         , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-    
-    script:
-    def args = task.ext.args ?: ''
-    def modFileName = "${fasta_file}.mod"
-    """
-    EDTA.pl \\
-    --genome $fasta_file \\
-    --threads $task.cpus \\
-    $args
-    
-    if [ -f "${modFileName}.EDTA.raw/LTR/${modFileName}.pass.list" ]; then
-        cat "${modFileName}.EDTA.raw/LTR/${modFileName}.pass.list" \\
-        > "${modFileName}.EDTA.pass.list"
-    else
-        echo "EDTA PASS LIST IS EMPTY" \\
-        > "${modFileName}.EDTA.pass.list"
-    fi
-
-    if [ -f "${modFileName}.EDTA.anno/${modFileName}.out" ]; then
-        cat "${modFileName}.EDTA.anno/${modFileName}.out" \\
-        > "${modFileName}.EDTA.out"
-    else
-        echo "EDTA DID NOT PRODUCE AN OUT FILE" \\
-        > "${modFileName}.EDTA.out"
-    fi
-
-    if [ ! -f "${modFileName}.EDTA.TEanno.gff3" ]; then
-        echo "##EDTA DID NOT PRODUCE A TEANNO GFF3" \\
-        > "${modFileName}.EDTA.TEanno.gff3"
-    fi
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        EDTA: \$(EDTA.pl -h | awk ' /##### Extensive/ {print \$7}')
-    END_VERSIONS
-    """
-    
-    stub:
-    def modFileName = "${fasta_file}.mod"
-    """
-    touch "${modFileName}.EDTA.TElib.fa"
-    touch "${modFileName}.EDTA.intact.gff3"
-    touch "${modFileName}.EDTA.pass.list"
-    touch "${modFileName}.EDTA.out"
-    touch "${modFileName}.EDTA.TEanno.gff3"
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        EDTA: \$(EDTA.pl -h | awk ' /##### Extensive/ {print \$7}')
-    END_VERSIONS
-    """
-}
\ No newline at end of file
diff --git a/modules/local/edta/restore_edta_ids/main.nf b/modules/local/edta/restore_edta_ids/main.nf
deleted file mode 100644
index 4da8a34..0000000
--- a/modules/local/edta/restore_edta_ids/main.nf
+++ /dev/null
@@ -1,63 +0,0 @@
-process RESTORE_EDTA_IDS {
-    tag "$meta.id"
-    label "process_single"
-
-    container "docker://gallvp/python3npkgs:v0.4"
-
-    input:
-    tuple val(meta), path(te_lib_fa)
-    path(intact_gff3)
-    path(pass_list)
-    path(out_file)
-    path(te_anno_gff3)
-    path(renamed_ids_tsv)
-    
-    output:
-    tuple val(meta), path("${meta.id}.EDTA.TElib.fa")               , emit: te_lib_fasta
-    tuple val(meta), path("${meta.id}.EDTA.intact.gff3")            , emit: intact_gff3
-    tuple val(meta), path("${meta.id}.renamed.ids.EDTA.pass.list")  , emit: pass_list
-    tuple val(meta), path("${meta.id}.renamed.ids.EDTA.out")        , emit: out_file
-    tuple val(meta), path("${meta.id}.EDTA.TEanno.gff3")            , emit: te_anno_gff3
-    tuple val(meta), path("${meta.id}.renamed.ids.tsv")             , emit: renamed_ids_tsv
-    path "versions.yml"                                             , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    """
-    cat $pass_list > "${meta.id}.renamed.ids.EDTA.pass.list"
-    cat $out_file > "${meta.id}.renamed.ids.EDTA.out"
-    cat $te_lib_fa > "${meta.id}.EDTA.TElib.fa"
-    cat $renamed_ids_tsv > "${meta.id}.renamed.ids.tsv"
-    
-    renamed_ids_head=\$(head -n 1 "$renamed_ids_tsv")
-    
-    if [[ \$renamed_ids_head == "IDs have acceptable length and character. No change required." ]]; then
-        cat $te_anno_gff3 > "${meta.id}.EDTA.TEanno.gff3"
-        cat $intact_gff3 > "${meta.id}.EDTA.intact.gff3"
-    else
-        reverse_edta_naming.py "$renamed_ids_tsv" "$te_anno_gff3" "$intact_gff3" "$meta"
-    fi
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        reverse_edta_naming: \$(md5sum \$(which reverse_edta_naming.py) | cut -d' ' -f1)
-    END_VERSIONS
-    """
-    
-    stub:
-    """
-    touch "${meta.id}.EDTA.TElib.fa"
-    touch "${meta.id}.EDTA.intact.gff3"
-    touch "${meta.id}.renamed.ids.EDTA.pass.list"
-    touch "${meta.id}.renamed.ids.EDTA.out"
-    touch "${meta.id}.EDTA.TEanno.gff3"
-    touch "${meta.id}.renamed.ids.tsv"
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        reverse_edta_naming: \$(md5sum \$(which reverse_edta_naming.py) | cut -d' ' -f1)
-    END_VERSIONS
-    """
-}
\ No newline at end of file
diff --git a/modules/local/edta/shorten_edta_ids/main.nf b/modules/local/edta/shorten_edta_ids/main.nf
deleted file mode 100644
index 43b94f0..0000000
--- a/modules/local/edta/shorten_edta_ids/main.nf
+++ /dev/null
@@ -1,30 +0,0 @@
-process SHORTEN_EDTA_IDS {
-    tag "$meta.id"
-    label "process_single"
-
-    container "docker://gallvp/python3npkgs:v0.4"
-    
-    input:
-    tuple val(meta), path(fasta_file)
-    
-    output:
-    tuple val(meta), path("*.renamed.ids.fa")   , emit: renamed_ids_fasta
-    tuple val(meta), path("*.renamed.ids.tsv")  , emit: renamed_ids_tsv
-    path "versions.yml"                         , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-    
-    script:
-    """
-    FILE="$fasta_file"
-    output_prefix="\${FILE%.*}"
-
-    shorten_fasta_ids.py "$fasta_file" "\$output_prefix"
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        shorten_fasta_ids: \$(md5sum \$(which shorten_fasta_ids.py) | cut -d' ' -f1)
-    END_VERSIONS
-    """
-}
\ No newline at end of file
diff --git a/modules/pfr/custom/restoregffids/environment.yml b/modules/pfr/custom/restoregffids/environment.yml
new file mode 100644
index 0000000..2450c45
--- /dev/null
+++ b/modules/pfr/custom/restoregffids/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "custom_restoregffids"
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - "python=3.10.2"
diff --git a/modules/pfr/custom/restoregffids/main.nf b/modules/pfr/custom/restoregffids/main.nf
new file mode 100644
index 0000000..14e2c07
--- /dev/null
+++ b/modules/pfr/custom/restoregffids/main.nf
@@ -0,0 +1,35 @@
+process CUSTOM_RESTOREGFFIDS {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/python:3.10.2':
+        'biocontainers/python:3.10.2' }"
+
+    input:
+    tuple val(meta), path(gff3)
+    path(ids_tsv)
+
+    output:
+    tuple val(meta), path("*.restored.ids.gff3")    , emit: restored_ids_gff3
+    path "versions.yml"                             , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    prefix = task.ext.prefix ?: "${meta.id}"
+    template 'restore_gff_ids.py'
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch "${prefix}.restored.ids.gff3"
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python --version | cut -d' ' -f2)
+    END_VERSIONS
+    """
+}
diff --git a/modules/pfr/custom/restoregffids/meta.yml b/modules/pfr/custom/restoregffids/meta.yml
new file mode 100644
index 0000000..4e42b82
--- /dev/null
+++ b/modules/pfr/custom/restoregffids/meta.yml
@@ -0,0 +1,58 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "custom_restoregffids"
+description: |
+  Restores IDs in a gff3 file based on a TSV table
+  consisting of original (first column) and new IDs (second column).
+  This module is helpful when some tools like EDTA implicitly shorten
+  the IDs without producing the ID map, leading to downstream mismatch
+  in IDs across files.
+keywords:
+  - genome
+  - gff
+  - ID
+  - shorten
+  - restore
+tools:
+  - "python":
+      description: |
+        Python is a programming language that lets you work quickly
+        and integrate systems more effectively
+      homepage: "https://www.python.org"
+      documentation: "https://docs.python.org/3/"
+      tool_dev_url: "https://github.com/python/cpython"
+      licence: ["MIT"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. `[ id:'test' ]`
+  - gff3:
+      type: file
+      description: Input gff3 file
+      pattern: "*.{gff,gff3}"
+  - ids_tsv:
+      type: file
+      description: |
+        A TSV file with original (first column) and new ids (second column)
+        if id change was required
+      pattern: "*.tsv"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. `[ id:'test' ]`
+  - restored_ids_gff3:
+      type: file
+      description: GFF3 file with restored ids
+      pattern: "*.restored.ids.gff3"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@GallVp"
+maintainers:
+  - "@GallVp"
diff --git a/modules/local/edta/restore_edta_ids/resources/usr/bin/reverse_edta_naming.py b/modules/pfr/custom/restoregffids/templates/restore_gff_ids.py
similarity index 50%
rename from modules/local/edta/restore_edta_ids/resources/usr/bin/reverse_edta_naming.py
rename to modules/pfr/custom/restoregffids/templates/restore_gff_ids.py
index 7e8522c..d0699de 100755
--- a/modules/local/edta/restore_edta_ids/resources/usr/bin/reverse_edta_naming.py
+++ b/modules/pfr/custom/restoregffids/templates/restore_gff_ids.py
@@ -1,19 +1,18 @@
 #!/usr/bin/env python3
 
-import sys
+from platform import python_version
 
-renamed_ids_tsv = sys.argv[1]
-te_anno_gff3 = sys.argv[2]
-intact_gff3 = sys.argv[3]
-output_prefix = sys.argv[4]
+ids_tsv = "$ids_tsv"
+input_gff3 = "$gff3"
+output_prefix = "$prefix"
 
 
-def create_name_mapping_from_file(file_path):
+def create_name_mapping_from_tsv(file_path):
     dictionary = {}
 
     with open(file_path, "r") as tsv_file:
         for line in tsv_file:
-            columns = line.strip().split("\t")
+            columns = line.strip().split("\\t")
             if len(columns) != 2:
                 raise ValueError(f"{file_path} should be a two column TSV file")
 
@@ -23,7 +22,12 @@ def create_name_mapping_from_file(file_path):
     return dictionary
 
 
-def reverse_rename_gff3_file(new_to_orig_ids, file_path, output_file_name):
+def restore_gff3_ids(new_to_orig_ids, file_path, output_file_name):
+    # Write versions
+    with open(f"versions.yml", "w") as f_versions:
+        f_versions.write('"${task.process}":\\n')
+        f_versions.write(f"    python: {python_version()}\\n")
+
     with open(file_path, "r") as input_gff3_file:
         input_lines = input_gff3_file.readlines()
 
@@ -33,16 +37,11 @@ def reverse_rename_gff3_file(new_to_orig_ids, file_path, output_file_name):
                 output_gff_file.write(line)
                 continue
 
-            new_id = line.split("\t")[0]
+            new_id = line.split("\\t")[0]
             orig_id = new_to_orig_ids[new_id]
-            output_gff_file.write(line.replace(new_id, orig_id))
+            output_gff_file.write("\\t".join([orig_id] + line.split("\\t")[1:]))
 
 
 if __name__ == "__main__":
-    new_to_orig_ids = create_name_mapping_from_file(renamed_ids_tsv)
-    reverse_rename_gff3_file(
-        new_to_orig_ids, te_anno_gff3, f"{output_prefix}.EDTA.TEanno.gff3"
-    )
-    reverse_rename_gff3_file(
-        new_to_orig_ids, intact_gff3, f"{output_prefix}.EDTA.intact.gff3"
-    )
+    new_to_orig_ids = create_name_mapping_from_tsv(ids_tsv)
+    restore_gff3_ids(new_to_orig_ids, input_gff3, f"{output_prefix}.restored.ids.gff3")
diff --git a/modules/pfr/custom/restoregffids/tests/main.nf.test b/modules/pfr/custom/restoregffids/tests/main.nf.test
new file mode 100644
index 0000000..521b924
--- /dev/null
+++ b/modules/pfr/custom/restoregffids/tests/main.nf.test
@@ -0,0 +1,63 @@
+nextflow_process {
+
+    name "Test Process CUSTOM_RESTOREGFFIDS"
+    script "../main.nf"
+    process "CUSTOM_RESTOREGFFIDS"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "custom"
+    tag "custom/restoregffids"
+
+    test("sarscov2-genome_gff3-success") {
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:false ], // meta map
+                    file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true)
+                ]
+                input[1] = Channel.of('Chr1\tMT192765.1').collectFile(name: 'id_map.tsv', newLine: true)
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() },
+                { assert path(process.out.restored_ids_gff3.get(0).get(1)).getText().contains("Chr1") },
+                { assert !path(process.out.restored_ids_gff3.get(0).get(1)).getText().contains("MT192765.1") },
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+
+    }
+
+    test("stub") {
+
+        options '-stub'
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:false ], // meta map
+                    file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true)
+                ]
+                input[1] = Channel.of('Chr1\tMT192765.1').collectFile(name: 'id_map.tsv', newLine: true)
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert process.out.restored_ids_gff3 != null },
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+
+    }
+
+}
diff --git a/modules/pfr/custom/restoregffids/tests/main.nf.test.snap b/modules/pfr/custom/restoregffids/tests/main.nf.test.snap
new file mode 100644
index 0000000..ffe43e7
--- /dev/null
+++ b/modules/pfr/custom/restoregffids/tests/main.nf.test.snap
@@ -0,0 +1,41 @@
+{
+    "sarscov2-genome_gff3-success": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.restored.ids.gff3:md5,2c294938b9eb4e52d19e14725c1d92a9"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,32d31c4f1da9a3d1be013fd163e5867e"
+                ],
+                "restored_ids_gff3": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.restored.ids.gff3:md5,2c294938b9eb4e52d19e14725c1d92a9"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,32d31c4f1da9a3d1be013fd163e5867e"
+                ]
+            }
+        ],
+        "timestamp": "2023-12-07T13:49:30.047425"
+    },
+    "versions": {
+        "content": [
+            [
+                "versions.yml:md5,32d31c4f1da9a3d1be013fd163e5867e"
+            ]
+        ],
+        "timestamp": "2023-12-07T13:49:30.071175"
+    }
+}
\ No newline at end of file
diff --git a/modules/pfr/custom/restoregffids/tests/tags.yml b/modules/pfr/custom/restoregffids/tests/tags.yml
new file mode 100644
index 0000000..1d4b9a8
--- /dev/null
+++ b/modules/pfr/custom/restoregffids/tests/tags.yml
@@ -0,0 +1,2 @@
+custom/restoregffids:
+  - "modules/pfr/custom/restoregffids/**"
diff --git a/modules/pfr/custom/shortenfastaids/environment.yml b/modules/pfr/custom/shortenfastaids/environment.yml
new file mode 100644
index 0000000..e80fa7c
--- /dev/null
+++ b/modules/pfr/custom/shortenfastaids/environment.yml
@@ -0,0 +1,11 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "custom_shortenfastaids"
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+
+dependencies:
+  - biopython==1.75
+  - python=3.8
diff --git a/modules/pfr/custom/shortenfastaids/main.nf b/modules/pfr/custom/shortenfastaids/main.nf
new file mode 100644
index 0000000..92762ef
--- /dev/null
+++ b/modules/pfr/custom/shortenfastaids/main.nf
@@ -0,0 +1,34 @@
+process CUSTOM_SHORTENFASTAIDS {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/biopython:1.75':
+        'biocontainers/biopython:1.75' }"
+
+    input:
+    tuple val(meta), path(fasta)
+
+    output:
+    tuple val(meta), path("*.short.ids.fasta")  , emit: short_ids_fasta , optional: true
+    tuple val(meta), path("*.short.ids.tsv")    , emit: short_ids_tsv   , optional: true
+    path "versions.yml"                         , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    prefix = task.ext.prefix ?: "${meta.id}"
+    template 'shorten_fasta_ids.py'
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python --version | cut -d' ' -f2)
+        biopython: \$(pip list | grep "biopython" | cut -d' ' -f3)
+    END_VERSIONS
+    """
+}
diff --git a/modules/pfr/custom/shortenfastaids/meta.yml b/modules/pfr/custom/shortenfastaids/meta.yml
new file mode 100644
index 0000000..2425810
--- /dev/null
+++ b/modules/pfr/custom/shortenfastaids/meta.yml
@@ -0,0 +1,58 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "custom_shortenfastaids"
+description: |
+  Shortens fasta IDs and produces a new fasta along with a TSV table
+  consisting of original (first column) and new IDs (second column).
+  This module is helpful when some tools like EDTA implicitly shorten
+  the IDs without producing the ID map, leading to downstream mismatch
+  in IDs across files.
+keywords:
+  - genome
+  - fasta
+  - ID
+  - shorten
+tools:
+  - "biopython":
+      description: |
+        Biopython is a set of freely available tools for biological computation written in Python by
+        an international team of developers.
+      homepage: "https://biopython.org"
+      documentation: "https://biopython.org/wiki/Documentation"
+      tool_dev_url: "https://github.com/biopython/biopython"
+      doi: "10.1093/bioinformatics/btp163"
+      licence: ["MIT"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. `[ id:'test' ]`
+  - fasta:
+      type: file
+      description: Input fasta file
+      pattern: "*.{fsa,fa,fasta}"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. `[ id:'test' ]`
+  - short_ids_fasta:
+      type: file
+      description: Fasta file with shortened ids if id change is required
+      pattern: "*.{fsa,fa,fasta}"
+  - short_ids_tsv:
+      type: file
+      description: |
+        A TSV file with original (first column) and new ids (second column)
+        if id change is required
+      pattern: "*.tsv"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@GallVp"
+maintainers:
+  - "@GallVp"
diff --git a/modules/local/edta/shorten_edta_ids/resources/usr/bin/shorten_fasta_ids.py b/modules/pfr/custom/shortenfastaids/templates/shorten_fasta_ids.py
similarity index 55%
rename from modules/local/edta/shorten_edta_ids/resources/usr/bin/shorten_fasta_ids.py
rename to modules/pfr/custom/shortenfastaids/templates/shorten_fasta_ids.py
index 0b6e6d2..54f35bf 100755
--- a/modules/local/edta/shorten_edta_ids/resources/usr/bin/shorten_fasta_ids.py
+++ b/modules/pfr/custom/shortenfastaids/templates/shorten_fasta_ids.py
@@ -1,26 +1,22 @@
 #!/usr/bin/env python3
 
 import re
-import sys
 
 from Bio import SeqIO
+from importlib.metadata import version
+from platform import python_version
 
 # The input fasta file path
-fasta_file_path = sys.argv[1]
+fasta_file_path = "$fasta"
+output_files_prefix = "$prefix"
 
-# The prefix for output files: prefix.renamed.ids.fa, prefix.renamed.ids.tsv
-output_files_prefix = sys.argv[2]
 
-# In the case where IDs have acceptable character and no change is needed, the output is stdout:
-# "IDs have acceptable length and character. No change required."
-
-
-def extract_fasta_ids(fasta_file_path):
+def extract_fasta_ids_and_descriptions(fasta_file_path):
     fasta_file_obj = SeqIO.parse(fasta_file_path, "fasta")
 
     ids = []
     for record in fasta_file_obj:
-        ids.append(record.id)
+        ids.append((record.id, record.description))
     return ids
 
 
@@ -38,29 +34,39 @@ def write_fasta_with_new_ids(fasta_file_path, id_mapping, file_prefix):
 
         replaced_records.append(record)
 
-    SeqIO.write(replaced_records, f"{file_prefix}.renamed.ids.fa", "fasta")
-
-
-def write_fasta_without_comments(fasta_file_path, file_prefix):
-    old_fasta_file_obj = SeqIO.parse(fasta_file_path, "fasta")
+    SeqIO.write(replaced_records, f"{file_prefix}.short.ids.fasta", "fasta")
 
-    replaced_records = []
-    for record in old_fasta_file_obj:
-        record.description = ""
-        replaced_records.append(record)
 
-    SeqIO.write(replaced_records, f"{file_prefix}.renamed.ids.fa", "fasta")
+def do_id_need_to_change(id_and_description, silent=False):
+    id = id_and_description[0]
+    description = id_and_description[1]
+    if len(id) > 13:
+        if not silent:
+            print(f"{id} has length greater than 13")
+        return True
 
+    if not re.match(r"^[a-zA-Z0-9_]+\$", id):
+        if not silent:
+            print(f"{id} does not match '^[a-zA-Z0-9_]+\$'")
+        return True
 
-def do_id_need_to_change(id):
-    if len(id) > 13 or not re.match(r"^[a-zA-Z0-9_]+$", id):
+    if description != id and description != "":
+        if not silent:
+            print(f"{id} contains a comment: {description.replace(id, '')}")
         return True
 
+    if not silent:
+        print(f"{id} is acceptable")
     return False
 
 
-def do_ids_need_to_change(ids):
-    return any([do_id_need_to_change(id) for id in ids])
+def do_ids_need_to_change(ids_and_descriptions, silent=False):
+    return any(
+        [
+            do_id_need_to_change(id_and_description, silent)
+            for id_and_description in ids_and_descriptions
+        ]
+    )
 
 
 def extract_common_patterns(ids):
@@ -80,23 +86,25 @@ def extract_common_patterns(ids):
     return {pattern: pattern[:3] for pattern in common_patterns}
 
 
-def shorten_ids(ids, patterns_dict):
+def shorten_ids(input_ids_and_descriptions, patterns_dict):
     shortened_ids = []
 
-    for id in ids:
-        if not do_id_need_to_change(id):
+    for id_and_description in input_ids_and_descriptions:
+        id = id_and_description[0]
+        description = ""  # Treat description as absent as it will be removed by write_fasta_with_new_ids
+        if not do_id_need_to_change((id, description), silent=True):
             shortened_ids.append(id)
             continue
 
         shortened_id = shorten_id_by_pattern_replacement(patterns_dict, id)
 
-        if not do_id_need_to_change(shortened_id):
+        if not do_id_need_to_change((shortened_id, description), silent=True):
             shortened_ids.append(shortened_id)
             continue
 
         shortened_id = f"Ctg{generate_hash(id)}"
 
-        if not do_id_need_to_change(shortened_id):
+        if not do_id_need_to_change((shortened_id, description), silent=True):
             shortened_ids.append(shortened_id)
             continue
 
@@ -146,24 +154,27 @@ def fail_if_new_ids_not_valid(ids):
 
 
 if __name__ == "__main__":
-    input_ids = extract_fasta_ids(fasta_file_path)
+    input_ids_and_descriptions = extract_fasta_ids_and_descriptions(fasta_file_path)
+    input_ids = [x[0] for x in input_ids_and_descriptions]
 
-    if not do_ids_need_to_change(input_ids):
-        print("IDs have acceptable length and character. No change required.")
-
-        with open(f"{output_files_prefix}.renamed.ids.tsv", "w") as f:
-            f.write("IDs have acceptable length and character. No change required.")
-
-        write_fasta_without_comments(fasta_file_path, output_files_prefix)
+    # Write versions
+    with open(f"versions.yml", "w") as f_versions:
+        f_versions.write('"${task.process}":\\n')
+        f_versions.write(f"    python: {python_version()}\\n")
+        f_versions.write(f"    biopython: {version('biopython')}\\n")
 
+    if not do_ids_need_to_change(input_ids_and_descriptions):
+        print("IDs have acceptable length and character. No change required.")
         exit(0)
 
-    new_ids = shorten_ids(input_ids, extract_common_patterns(input_ids))
+    new_ids = shorten_ids(
+        input_ids_and_descriptions, extract_common_patterns(input_ids)
+    )
     fail_if_new_ids_not_valid(new_ids)
 
-    with open(f"{output_files_prefix}.renamed.ids.tsv", "w") as f:
+    with open(f"{output_files_prefix}.short.ids.tsv", "w") as f:
         for input_id, new_id in zip(input_ids, new_ids):
-            f.write(f"{input_id}\t{new_id}\n")
+            f.write(f"{input_id}\\t{new_id}\\n")
 
     write_fasta_with_new_ids(
         fasta_file_path, zip(input_ids, new_ids), output_files_prefix
diff --git a/modules/pfr/custom/shortenfastaids/tests/main.nf.test b/modules/pfr/custom/shortenfastaids/tests/main.nf.test
new file mode 100644
index 0000000..dc46bae
--- /dev/null
+++ b/modules/pfr/custom/shortenfastaids/tests/main.nf.test
@@ -0,0 +1,131 @@
+nextflow_process {
+
+    name "Test Process CUSTOM_SHORTENFASTAIDS"
+    script "../main.nf"
+    process "CUSTOM_SHORTENFASTAIDS"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "custom"
+    tag "custom/shortenfastaids"
+
+    test("homo_sapiens-genome_fasta-no_change") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ],
+                    file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() },
+                { assert snapshot(process.out.versions).match("versions") },
+                { assert process.out.short_ids_fasta == [] },
+                { assert process.out.short_ids_tsv == [] }
+            )
+        }
+
+    }
+
+    test("sarscov2-genome_fasta-pattern_change") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ],
+                    file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() },
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+
+    }
+
+    test("homo_sapiens-genome2_fasta-length_change") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ],
+                    file(params.test_data['homo_sapiens']['genome']['genome2_fasta'], checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() },
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+
+    }
+
+    test("custom_fasta-comment_change") {
+
+        when {
+            process {
+                """
+                input[0] = Channel.of('>Chr1 This is a test comment', 'AGCTAGCT')
+                | collectFile(name: 'sample.fasta', newLine: true)
+                | map { file -> [ [ id:'test' ], file ] }
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() },
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+
+    }
+
+    test("stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ],
+                    file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.versions).match("versions") },
+                { assert process.out.short_ids_fasta == [] },
+                { assert process.out.short_ids_tsv == [] }
+            )
+        }
+
+    }
+
+}
diff --git a/modules/pfr/custom/shortenfastaids/tests/main.nf.test.snap b/modules/pfr/custom/shortenfastaids/tests/main.nf.test.snap
new file mode 100644
index 0000000..8fed1b9
--- /dev/null
+++ b/modules/pfr/custom/shortenfastaids/tests/main.nf.test.snap
@@ -0,0 +1,170 @@
+{
+    "custom_fasta-comment_change": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.short.ids.fasta:md5,c861b9d46a4d9bdba66953cff572fc5d"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.short.ids.tsv:md5,8762f2bffbdff75c2812bad72ba52bba"
+                    ]
+                ],
+                "2": [
+                    "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba"
+                ],
+                "short_ids_fasta": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.short.ids.fasta:md5,c861b9d46a4d9bdba66953cff572fc5d"
+                    ]
+                ],
+                "short_ids_tsv": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.short.ids.tsv:md5,8762f2bffbdff75c2812bad72ba52bba"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba"
+                ]
+            }
+        ],
+        "timestamp": "2023-12-07T13:33:05.523745"
+    },
+    "versions": {
+        "content": [
+            [
+                "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba"
+            ]
+        ],
+        "timestamp": "2023-12-07T13:30:30.361527"
+    },
+    "homo_sapiens-genome_fasta-no_change": {
+        "content": [
+            {
+                "0": [
+                    
+                ],
+                "1": [
+                    
+                ],
+                "2": [
+                    "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba"
+                ],
+                "short_ids_fasta": [
+                    
+                ],
+                "short_ids_tsv": [
+                    
+                ],
+                "versions": [
+                    "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba"
+                ]
+            }
+        ],
+        "timestamp": "2023-12-07T13:32:54.220188"
+    },
+    "homo_sapiens-genome2_fasta-length_change": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.short.ids.fasta:md5,1382acd98d4cd233a8062ef01b2aaa6d"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.short.ids.tsv:md5,99c0f2a529cb595b2d8530024ed2880e"
+                    ]
+                ],
+                "2": [
+                    "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba"
+                ],
+                "short_ids_fasta": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.short.ids.fasta:md5,1382acd98d4cd233a8062ef01b2aaa6d"
+                    ]
+                ],
+                "short_ids_tsv": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.short.ids.tsv:md5,99c0f2a529cb595b2d8530024ed2880e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba"
+                ]
+            }
+        ],
+        "timestamp": "2023-12-07T13:33:01.924483"
+    },
+    "sarscov2-genome_fasta-pattern_change": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.short.ids.fasta:md5,14d6f587b6d28889c5c0f985e78d602f"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.short.ids.tsv:md5,d7a2af88e8549586e5616bff6a88bd71"
+                    ]
+                ],
+                "2": [
+                    "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba"
+                ],
+                "short_ids_fasta": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.short.ids.fasta:md5,14d6f587b6d28889c5c0f985e78d602f"
+                    ]
+                ],
+                "short_ids_tsv": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.short.ids.tsv:md5,d7a2af88e8549586e5616bff6a88bd71"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba"
+                ]
+            }
+        ],
+        "timestamp": "2023-12-07T13:32:58.12885"
+    }
+}
\ No newline at end of file
diff --git a/modules/pfr/custom/shortenfastaids/tests/tags.yml b/modules/pfr/custom/shortenfastaids/tests/tags.yml
new file mode 100644
index 0000000..4715b64
--- /dev/null
+++ b/modules/pfr/custom/shortenfastaids/tests/tags.yml
@@ -0,0 +1,2 @@
+custom/shortenfastaids:
+  - "modules/pfr/custom/shortenfastaids/**"
diff --git a/modules/pfr/edta/edta/environment.yml b/modules/pfr/edta/edta/environment.yml
new file mode 100644
index 0000000..63160e8
--- /dev/null
+++ b/modules/pfr/edta/edta/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "edta_edta"
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - "bioconda::edta=2.1.0"
diff --git a/modules/pfr/edta/edta/main.nf b/modules/pfr/edta/edta/main.nf
new file mode 100644
index 0000000..458f525
--- /dev/null
+++ b/modules/pfr/edta/edta/main.nf
@@ -0,0 +1,93 @@
+process EDTA_EDTA {
+    tag "$meta.id"
+    label 'process_high'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/edta:2.1.0--hdfd78af_1':
+        'biocontainers/edta:2.1.0--hdfd78af_1' }"
+
+    input:
+    tuple val(meta), path(fasta)
+    path cds
+    path curatedlib
+    path rmout
+    path exclude
+
+    output:
+    tuple val(meta), path('*.log')              , emit: log
+    tuple val(meta), path('*.EDTA.TElib.fa')    , emit: te_lib_fasta
+    tuple val(meta), path('*.EDTA.pass.list')   , emit: pass_list           , optional: true
+    tuple val(meta), path('*.EDTA.out')         , emit: out_file            , optional: true
+    tuple val(meta), path('*.EDTA.TEanno.gff3') , emit: te_anno_gff3        , optional: true
+    path "versions.yml"                         , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args            = task.ext.args         ?: ''
+    def prefix          = task.ext.prefix       ?: "${meta.id}"
+    def mod_file_name   = "${fasta}.mod"
+    def cds_file        = cds                   ? "--cds $cds"              : ''
+    def curatedlib_file = curatedlib            ? "--curatedlib $curatedlib": ''
+    def rmout_file      = rmout                 ? "--rmout $rmout"          : ''
+    def exclude_file    = exclude               ? "--exclude $exclude"      : ''
+    """
+    EDTA.pl \\
+        --genome $fasta \\
+        --threads $task.cpus \\
+        $cds_file \\
+        $curatedlib_file \\
+        $rmout_file \\
+        $exclude_file \\
+        $args \\
+        &> "${prefix}.log"
+
+    mv \\
+        "${mod_file_name}.EDTA.TElib.fa" \\
+        "${prefix}.EDTA.TElib.fa"
+
+    [ -f "${mod_file_name}.EDTA.raw/LTR/${mod_file_name}.pass.list" ] \\
+        && mv \\
+            "${mod_file_name}.EDTA.raw/LTR/${mod_file_name}.pass.list" \\
+            "${prefix}.EDTA.pass.list" \\
+        || echo "EDTA did not produce a pass.list file"
+
+    [ -f "${mod_file_name}.EDTA.anno/${mod_file_name}.out" ] \\
+        && mv \\
+            "${mod_file_name}.EDTA.anno/${mod_file_name}.out" \\
+            "${prefix}.EDTA.out" \\
+        || echo "EDTA did not produce an out file"
+
+    [ -f "${mod_file_name}.EDTA.TEanno.gff3" ] \\
+        && mv \\
+            "${mod_file_name}.EDTA.TEanno.gff3" \\
+            "${prefix}.EDTA.TEanno.gff3" \\
+        || echo "EDTA did not produce a TEanno gff3 file"
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        EDTA: \$(EDTA.pl -h | awk ' /##### Extensive/ {print \$7}')
+    END_VERSIONS
+    """
+
+    stub:
+    def args            = task.ext.args ?: ''
+    def prefix          = task.ext.prefix ?: "${meta.id}"
+    def touch_pass_list = args.contains("--anno 1") ? "touch ${prefix}.EDTA.pass.list"  : ''
+    def touch_out_file  = args.contains("--anno 1") ? "touch ${prefix}.EDTA.out"        : ''
+    def touch_te_anno   = args.contains("--anno 1") ? "touch ${prefix}.EDTA.TEanno.gff3": ''
+    """
+    touch "${prefix}.log"
+    touch "${prefix}.EDTA.TElib.fa"
+    $touch_pass_list
+    $touch_out_file
+    $touch_te_anno
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        EDTA: \$(EDTA.pl -h | awk ' /##### Extensive/ {print \$7}')
+    END_VERSIONS
+    """
+}
diff --git a/modules/pfr/edta/edta/meta.yml b/modules/pfr/edta/edta/meta.yml
new file mode 100644
index 0000000..4d59fdf
--- /dev/null
+++ b/modules/pfr/edta/edta/meta.yml
@@ -0,0 +1,61 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "edta_edta"
+description: Extensive de-novo TE Annotator (EDTA)
+keywords:
+  - genome
+  - repeat
+  - annotation
+  - transposable-elements
+tools:
+  - "edta":
+      description: Extensive de-novo TE Annotator (EDTA)
+      homepage: "https://github.com/oushujun/EDTA"
+      documentation: "https://github.com/oushujun/EDTA"
+      tool_dev_url: "https://github.com/oushujun/EDTA"
+      doi: "10.1186/s13059-019-1905-y"
+      licence: ["GPL v3"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. `[ id:'test' ]`
+  - fasta:
+      type: file
+      description: Genome fasta file
+      pattern: "*.{fsa,fa,fasta}"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. `[ id:'test' ]`
+  - log:
+      type: file
+      description: Log emitted by EDTA
+      pattern: "*.log"
+  - te_lib_fasta:
+      type: file
+      description: A non-redundant TE library in fasta format
+      pattern: "*.EDTA.TElib.fa"
+  - pass_list:
+      type: file
+      description: A summary table of intact LTR-RTs with coordinate and structural information
+      pattern: "*.EDTA.pass.list"
+  - out_file:
+      type: file
+      description: RepeatMasker annotation of all LTR sequences in the genome
+      pattern: "*.EDTA.out"
+  - te_anno_gff3:
+      type: file
+      description: A gff3 file containing both structurally intact and fragmented TE annotations
+      pattern: "*.EDTA.TEanno.gff3"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@GallVp"
+maintainers:
+  - "@GallVp"
diff --git a/modules/pfr/edta/edta/tests/main.nf.test b/modules/pfr/edta/edta/tests/main.nf.test
new file mode 100644
index 0000000..d0a7142
--- /dev/null
+++ b/modules/pfr/edta/edta/tests/main.nf.test
@@ -0,0 +1,72 @@
+nextflow_process {
+
+    name "Test Process EDTA_EDTA"
+    script "../main.nf"
+    process "EDTA_EDTA"
+    config "./nextflow.config"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "edta"
+    tag "edta/edta"
+
+    test("homo_sapiens-genome_fasta") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ], // meta map
+                    file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+                ]
+                input[1] = []
+                input[2] = []
+                input[3] = []
+                input[4] = []
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+
+    }
+
+    test("stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ], // meta map
+                    file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+                ]
+                input[1] = []
+                input[2] = []
+                input[3] = []
+                input[4] = []
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.versions).match("versions") },
+                { assert process.out.log != null },
+                { assert process.out.te_lib_fasta != null },
+                { assert process.out.pass_list != null },
+                { assert process.out.out_file != null },
+                { assert process.out.te_anno_gff3 != null }
+            )
+        }
+
+    }
+
+}
diff --git a/modules/pfr/edta/edta/tests/nextflow.config b/modules/pfr/edta/edta/tests/nextflow.config
new file mode 100644
index 0000000..b20ca5e
--- /dev/null
+++ b/modules/pfr/edta/edta/tests/nextflow.config
@@ -0,0 +1,3 @@
+process {
+    ext.args = '--anno 1 --evaluate 1'
+}
diff --git a/modules/pfr/edta/edta/tests/tags.yml b/modules/pfr/edta/edta/tests/tags.yml
new file mode 100644
index 0000000..180ae6d
--- /dev/null
+++ b/modules/pfr/edta/edta/tests/tags.yml
@@ -0,0 +1,2 @@
+edta/edta:
+  - "modules/pfr/edta/edta/**"

From 26c33faddd8e133886dd330c372c19485b927e74 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Tue, 19 Dec 2023 10:31:05 +1300
Subject: [PATCH 26/59] Updated config

---
 conf/base.config               | 45 ++++++++++++++++++----------------
 conf/manifest.config           |  4 +--
 conf/reporting_defaults.config |  2 --
 pangene_pfr.sh                 |  4 +--
 4 files changed, 28 insertions(+), 27 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index 54db554..54114d3 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -1,7 +1,11 @@
 profiles {
-    slurm {
+    pfr {
         process {
             executor        = 'slurm'
+
+            apptainer {
+                envWhitelist= 'APPTAINER_BINDPATH,APPTAINER_BIND'
+            }
         }
     }
     
@@ -10,10 +14,22 @@ profiles {
             executor        = 'local'
         }
     }
+
+    apptainer {
+        apptainer.enabled   = true
+        apptainer.autoMounts= true
+        apptainer.registry  = 'quay.io'
+    }
+
+    docker {
+        docker.enabled      = true
+        docker.userEmulation= false
+        docker.fixOwnership = true
+        docker.runOptions   = '--platform=linux/amd64'
+        docker.registry     = 'quay.io'
+    }
 }
 
-// Source: https://github.com/nf-core/rnaseq
-// License: https://github.com/nf-core/rnaseq/blob/master/LICENSE
 process {
 
     cpus                    = { check_max( 1    * task.attempt, 'cpus'   ) }
@@ -24,12 +40,6 @@ process {
     maxRetries              = 1
     maxErrors               = '-1'
 
-    // Process-specific resource requirements
-    // NOTE - Please try and re-use the labels below as much as possible.
-    //        These labels are used and recognised by default in DSL2 files hosted on nf-core/modules.
-    //        If possible, it would be nice to keep the same label naming convention when
-    //        adding in your local modules too.
-    // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors
     withLabel:process_single {
         cpus                = { check_max( 1                  , 'cpus'     ) }
         memory              = { check_max( 6.GB * task.attempt, 'memory'   ) }
@@ -53,19 +63,14 @@ process {
     withLabel:process_long {
         time                = { check_max( 20.h  * task.attempt, 'time'    ) }
     }
-    withLabel:process_week_long {
-        time                = { check_max( 7.days  * task.attempt, 'time'  ) }
-    }
     withLabel:process_high_memory {
         memory              = { check_max( 200.GB * task.attempt, 'memory' ) }
     }
-}
-
-apptainer {
-    enabled                 = true
-    autoMounts              = true
-    envWhitelist            = "APPTAINER_BINDPATH,APPTAINER_BIND"
-    registry                = 'quay.io'
+    
+    // Custom
+    withLabel:process_week_long {
+        time                = { check_max( 7.days  * task.attempt, 'time'  ) }
+    }
 }
 
 nextflow {
@@ -74,8 +79,6 @@ nextflow {
     }
 }
 
-// Source: https://github.com/nf-core/rnaseq
-// License: https://github.com/nf-core/rnaseq/blob/master/LICENSE
 def check_max(obj, type) {
     if (type == 'memory') {
         try {
diff --git a/conf/manifest.config b/conf/manifest.config
index 7bf1f6b..706052c 100644
--- a/conf/manifest.config
+++ b/conf/manifest.config
@@ -1,10 +1,10 @@
 manifest {
-    name                    = 'pan-gene'
+    name                    = 'pangene'
     author                  = """Usman Rashid"""
     homePage                = 'https://github.com/PlantandFoodResearch/pan-gene'
     description             = """A NextFlow pipeline for pan-genome annotation"""
     mainScript              = 'main.nf'
-    nextflowVersion         = '!>=22.10.4'
+    nextflowVersion         = '!>=23.04.4'
     version                 = '0.1'
     doi                     = ''
 }
\ No newline at end of file
diff --git a/conf/reporting_defaults.config b/conf/reporting_defaults.config
index c85d378..5df9469 100644
--- a/conf/reporting_defaults.config
+++ b/conf/reporting_defaults.config
@@ -1,5 +1,3 @@
-// Source: https://github.com/nf-core/rnaseq
-// License: https://github.com/nf-core/rnaseq/blob/master/LICENSE
 def trace_timestamp         = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')
 timeline {
     enabled                 = true
diff --git a/pangene_pfr.sh b/pangene_pfr.sh
index 3b048df..ab3d262 100644
--- a/pangene_pfr.sh
+++ b/pangene_pfr.sh
@@ -13,7 +13,7 @@
 ml apptainer/1.1
 ml nextflow/23.04.4
 
-export APPTAINER_BINDPATH="$APPTAINER_BINDPATH,/workspace/$USER/tmp:/tmp"
 export TMPDIR="/workspace/$USER/tmp"
+export APPTAINER_BINDPATH="$APPTAINER_BINDPATH,$TMPDIR:$TMPDIR,$TMPDIR:/tmp"
 
-nextflow main.nf -profile slurm -resume
\ No newline at end of file
+nextflow main.nf -profile pfr,apptainer -resume
\ No newline at end of file

From 9e5831471f6c25e39fd564f1a21eaace4b2a6b37 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Tue, 19 Dec 2023 11:50:16 +1300
Subject: [PATCH 27/59] Integrated fastavalidator

---
 TODO.md                                |  12 +-
 modules.json                           | 291 ++++++++++++++-----------
 subworkflows/local/prepare_assembly.nf |  20 +-
 3 files changed, 190 insertions(+), 133 deletions(-)

diff --git a/TODO.md b/TODO.md
index 8c90b99..0134c26 100644
--- a/TODO.md
+++ b/TODO.md
@@ -2,4 +2,14 @@
 - [ ] From Ross regarding post-processing:
 
 > [9:49 am] Ross Crowhurst
-Here is an easy one: BLATSp vs swissprot & Arabidpsis and check query is with set thresholds of reference - if so accept; If not move to BLASTp vs Uniref90 or Refeq (or some other predetermined model species) - same deal accept if within threshold limits. Else BLASTn of cds vs NCBI nt (really scrapping the bottom of the barrel here). If not a hit to anything then chances are its garbage and should be removed. Some ppl might try to claim its a unique protein to the genotype but in 20 years I have never seen one of those be supported - mostly this category is garbage. The screen agains NCBI nt also assists to classify "bits" as well retroposonss etc. Idea being you want to remove garbage predictions - as this does take time you can see why some papers just filter out by size.
\ No newline at end of file
+Here is an easy one: BLATSp vs swissprot & Arabidpsis and check query is with set thresholds of reference - if so accept; If not move to BLASTp vs Uniref90 or Refeq (or some other predetermined model species) - same deal accept if within threshold limits. Else BLASTn of cds vs NCBI nt (really scrapping the bottom of the barrel here). If not a hit to anything then chances are its garbage and should be removed. Some ppl might try to claim its a unique protein to the genotype but in 20 years I have never seen one of those be supported - mostly this category is garbage. The screen agains NCBI nt also assists to classify "bits" as well retroposonss etc. Idea being you want to remove garbage predictions - as this does take time you can see why some papers just filter out by size.
+
+- [ ] From Cecilia:
+
+> https://github.com/zhaotao1987/SynNet-Pipeline
+
+- [ ] From Ross:
+
+> https://www.biorxiv.org/content/10.1101/096529v2.full.pdf
+
+> Don't use `-exclude_partial`
\ No newline at end of file
diff --git a/modules.json b/modules.json
index fa42bdf..0119dda 100644
--- a/modules.json
+++ b/modules.json
@@ -1,128 +1,169 @@
 {
-  "name": "PlantandFoodResearch/pangene",
-  "homePage": "https://github.com/PlantandFoodResearch/pangene",
-  "repos": {
-    "git@github.com:PlantandFoodResearch/nxf-modules.git": {
-      "modules": {
-        "pfr": {
-          "custom/restoregffids": {
-            "branch": "main",
-            "git_sha": "e9f6bdd634bdbcd52c5568ba82f16176ec06631f",
-            "installed_by": ["modules"]
-          },
-          "custom/shortenfastaids": {
-            "branch": "main",
-            "git_sha": "5e0e41b51d7fc7f68ae43692b6fe19b95d7f3a8c",
-            "installed_by": ["modules"]
-          },
-          "edta/edta": {
-            "branch": "main",
-            "git_sha": "d14b1f1d790cc01c11f8ec5aa80a9562c6808b20",
-            "installed_by": ["modules"]
-          },
-          "liftoff": {
-            "branch": "main",
-            "git_sha": "3593ec100c92b656204bf739a51d62fd44d81f6f",
-            "installed_by": ["modules"]
-          }
+    "name": "PlantandFoodResearch/pangene",
+    "homePage": "https://github.com/PlantandFoodResearch/pangene",
+    "repos": {
+        "git@github.com:PlantandFoodResearch/nxf-modules.git": {
+            "modules": {
+                "pfr": {
+                    "custom/restoregffids": {
+                        "branch": "main",
+                        "git_sha": "e9f6bdd634bdbcd52c5568ba82f16176ec06631f",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "custom/shortenfastaids": {
+                        "branch": "main",
+                        "git_sha": "5e0e41b51d7fc7f68ae43692b6fe19b95d7f3a8c",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "edta/edta": {
+                        "branch": "main",
+                        "git_sha": "d14b1f1d790cc01c11f8ec5aa80a9562c6808b20",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "liftoff": {
+                        "branch": "main",
+                        "git_sha": "3593ec100c92b656204bf739a51d62fd44d81f6f",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    }
+                }
+            }
+        },
+        "git@github.com:kherronism/nf-modules.git": {
+            "modules": {
+                "kherronism": {
+                    "braker3": {
+                        "branch": "dev",
+                        "git_sha": "b01fec253f3b73b24e3f166a96d4beb49e58b0a6",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "repeatmasker": {
+                        "branch": "dev",
+                        "git_sha": "6778d5bb4c9f3d597753c699226fcde8d0811bfb",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    }
+                }
+            }
+        },
+        "https://github.com/nf-core/modules.git": {
+            "modules": {
+                "nf-core": {
+                    "cat/cat": {
+                        "branch": "master",
+                        "git_sha": "d593e8f6b7d1bbbb2acf43a4b9efeeac8d6720f2",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "cat/fastq": {
+                        "branch": "master",
+                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "custom/dumpsoftwareversions": {
+                        "branch": "master",
+                        "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "fastavalidator": {
+                        "branch": "master",
+                        "git_sha": "89ff95427f695086369d7927a3c17cea2a37a382",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "fastp": {
+                        "branch": "master",
+                        "git_sha": "3c77ca9aac783e76c3614a06db3bfe4fef619bde",
+                        "installed_by": [
+                            "fastq_fastqc_umitools_fastp"
+                        ]
+                    },
+                    "fastqc": {
+                        "branch": "master",
+                        "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53",
+                        "installed_by": [
+                            "fastq_fastqc_umitools_fastp",
+                            "modules"
+                        ]
+                    },
+                    "gffread": {
+                        "branch": "master",
+                        "git_sha": "a2d6c3082c5c44b4155a3246daff36701ee49af8",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "gunzip": {
+                        "branch": "master",
+                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "samtools/cat": {
+                        "branch": "master",
+                        "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "sortmerna": {
+                        "branch": "master",
+                        "git_sha": "a20b6b1e9114a08007608528e4a2b0fbbb8a9ca2",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "star/align": {
+                        "branch": "master",
+                        "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "star/genomegenerate": {
+                        "branch": "master",
+                        "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "umitools/extract": {
+                        "branch": "master",
+                        "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53",
+                        "installed_by": [
+                            "fastq_fastqc_umitools_fastp"
+                        ]
+                    }
+                }
+            },
+            "subworkflows": {
+                "nf-core": {
+                    "fastq_fastqc_umitools_fastp": {
+                        "branch": "master",
+                        "git_sha": "3e8b0c1144ccf60b7848efbdc2be285ff20b49ee",
+                        "installed_by": [
+                            "subworkflows"
+                        ]
+                    }
+                }
+            }
         }
-      }
-    },
-    "git@github.com:kherronism/nf-modules.git": {
-      "modules": {
-        "kherronism": {
-          "braker3": {
-            "branch": "dev",
-            "git_sha": "b01fec253f3b73b24e3f166a96d4beb49e58b0a6",
-            "installed_by": ["modules"]
-          },
-          "repeatmasker": {
-            "branch": "dev",
-            "git_sha": "6778d5bb4c9f3d597753c699226fcde8d0811bfb",
-            "installed_by": ["modules"]
-          }
-        }
-      }
-    },
-    "https://github.com/nf-core/modules.git": {
-      "modules": {
-        "nf-core": {
-          "cat/cat": {
-            "branch": "master",
-            "git_sha": "d593e8f6b7d1bbbb2acf43a4b9efeeac8d6720f2",
-            "installed_by": ["modules"]
-          },
-          "cat/fastq": {
-            "branch": "master",
-            "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-            "installed_by": ["modules"]
-          },
-          "custom/dumpsoftwareversions": {
-            "branch": "master",
-            "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e",
-            "installed_by": ["modules"]
-          },
-          "fastavalidator": {
-            "branch": "master",
-            "git_sha": "89ff95427f695086369d7927a3c17cea2a37a382",
-            "installed_by": ["modules"]
-          },
-          "fastp": {
-            "branch": "master",
-            "git_sha": "3c77ca9aac783e76c3614a06db3bfe4fef619bde",
-            "installed_by": ["fastq_fastqc_umitools_fastp"]
-          },
-          "fastqc": {
-            "branch": "master",
-            "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53",
-            "installed_by": ["fastq_fastqc_umitools_fastp", "modules"]
-          },
-          "gffread": {
-            "branch": "master",
-            "git_sha": "a2d6c3082c5c44b4155a3246daff36701ee49af8",
-            "installed_by": ["modules"]
-          },
-          "gunzip": {
-            "branch": "master",
-            "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-            "installed_by": ["modules"]
-          },
-          "samtools/cat": {
-            "branch": "master",
-            "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
-            "installed_by": ["modules"]
-          },
-          "sortmerna": {
-            "branch": "master",
-            "git_sha": "a20b6b1e9114a08007608528e4a2b0fbbb8a9ca2",
-            "installed_by": ["modules"]
-          },
-          "star/align": {
-            "branch": "master",
-            "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
-            "installed_by": ["modules"]
-          },
-          "star/genomegenerate": {
-            "branch": "master",
-            "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
-            "installed_by": ["modules"]
-          },
-          "umitools/extract": {
-            "branch": "master",
-            "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53",
-            "installed_by": ["fastq_fastqc_umitools_fastp"]
-          }
-        }
-      },
-      "subworkflows": {
-        "nf-core": {
-          "fastq_fastqc_umitools_fastp": {
-            "branch": "master",
-            "git_sha": "3e8b0c1144ccf60b7848efbdc2be285ff20b49ee",
-            "installed_by": ["subworkflows"]
-          }
-        }
-      }
     }
-  }
-}
+}
\ No newline at end of file
diff --git a/subworkflows/local/prepare_assembly.nf b/subworkflows/local/prepare_assembly.nf
index 7469afc..05bd1ec 100644
--- a/subworkflows/local/prepare_assembly.nf
+++ b/subworkflows/local/prepare_assembly.nf
@@ -1,11 +1,9 @@
 include { GUNZIP as GUNZIP_TARGET_ASSEMBLY      } from '../../modules/nf-core/gunzip'
 include { GUNZIP as GUNZIP_TE_LIBRARY           } from '../../modules/nf-core/gunzip'
-include { FASTA_VALIDATE                        } from '../../modules/local/fasta_validate'
+include { FASTAVALIDATOR                        } from '../../modules/nf-core/fastavalidator'
 include { REPEATMASKER                          } from '../../modules/kherronism/repeatmasker'
 include { STAR_GENOMEGENERATE                   } from '../../modules/nf-core/star/genomegenerate'
 
-include { FASTA_EDTA                            } from '../../subworkflows/local/fasta_edta'
-
 workflow PREPARE_ASSEMBLY {
     take:
     target_assembly     // channel: [ meta, fasta ]
@@ -29,11 +27,19 @@ workflow PREPARE_ASSEMBLY {
     )
     | set { ch_gunzip_target_assembly }
 
-    // MODULE: FASTA_VALIDATE
-    FASTA_VALIDATE(ch_gunzip_target_assembly)
-    .valid_fasta
+    // MODULE: FASTAVALIDATOR
+    FASTAVALIDATOR(ch_gunzip_target_assembly)
+
+    ch_gunzip_target_assembly
+    | join(FASTAVALIDATOR.out.success_log)
+    | map { meta, fasta, log -> [ meta, fasta ] }
     | set { ch_validated_target_assembly }
 
+    FASTAVALIDATOR.out.error_log
+    | map { meta, log ->
+        System.err.println("WARNING: FASTAVALIDATOR failed for ${meta.id} with error: ${log}. ${meta.id} is excluded from further analysis.")
+    }
+
     // MODULE: GUNZIP_TE_LIBRARY
     te_library
     | branch { meta, file ->
@@ -85,7 +91,7 @@ workflow PREPARE_ASSEMBLY {
     | set { ch_assembly_index }
 
     Channel.empty()
-    | mix(FASTA_VALIDATE.out.versions.first())
+    | mix(FASTAVALIDATOR.out.versions.first())
     | mix(GUNZIP_TE_LIBRARY.out.versions.first())
     | mix(FASTA_EDTA.out.versions)
     | mix(REPEATMASKER.out.versions.first())

From 4534684f412accaef7dd8213a9a7351549e5e0a0 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Tue, 19 Dec 2023 12:12:52 +1300
Subject: [PATCH 28/59] Added patch for star/genomegenerate

---
 modules.json                                  |   3 +-
 .../star/genomegenerate/environment.yml       |   4 +-
 modules/nf-core/star/genomegenerate/main.nf   |  83 +++---
 .../genomegenerate/star-genomegenerate.diff   | 247 ++++++++++++++++++
 .../star/genomegenerate/tests/main.nf.test    |  81 +++++-
 .../genomegenerate/tests/main.nf.test.snap    |  14 +-
 6 files changed, 395 insertions(+), 37 deletions(-)
 create mode 100644 modules/nf-core/star/genomegenerate/star-genomegenerate.diff

diff --git a/modules.json b/modules.json
index 0119dda..80995ba 100644
--- a/modules.json
+++ b/modules.json
@@ -142,7 +142,8 @@
                         "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
                         "installed_by": [
                             "modules"
-                        ]
+                        ],
+                        "patch": "modules/nf-core/star/genomegenerate/star-genomegenerate.diff"
                     },
                     "umitools/extract": {
                         "branch": "master",
diff --git a/modules/nf-core/star/genomegenerate/environment.yml b/modules/nf-core/star/genomegenerate/environment.yml
index 350a459..93e4476 100644
--- a/modules/nf-core/star/genomegenerate/environment.yml
+++ b/modules/nf-core/star/genomegenerate/environment.yml
@@ -1,9 +1,11 @@
 name: star_genomegenerate
+
 channels:
   - conda-forge
   - bioconda
   - defaults
+
 dependencies:
-  - bioconda::star=2.7.10a
   - bioconda::samtools=1.18
+  - bioconda::star=2.7.10a
   - conda-forge::gawk=5.1.0
diff --git a/modules/nf-core/star/genomegenerate/main.nf b/modules/nf-core/star/genomegenerate/main.nf
index 2bc3e29..b885571 100644
--- a/modules/nf-core/star/genomegenerate/main.nf
+++ b/modules/nf-core/star/genomegenerate/main.nf
@@ -19,9 +19,10 @@ process STAR_GENOMEGENERATE {
     task.ext.when == null || task.ext.when
 
     script:
-    def args = task.ext.args ?: ''
-    def args_list = args.tokenize()
-    def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : ''
+    def args        = task.ext.args ?: ''
+    def args_list   = args.tokenize()
+    def memory      = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : ''
+    def include_gtf = gtf ? "--sjdbGTFfile $gtf" : ''
     if (args_list.contains('--genomeSAindexNbases')) {
         """
         mkdir star
@@ -29,7 +30,7 @@ process STAR_GENOMEGENERATE {
             --runMode genomeGenerate \\
             --genomeDir star/ \\
             --genomeFastaFiles $fasta \\
-            --sjdbGTFfile $gtf \\
+            $include_gtf \\
             --runThreadN $task.cpus \\
             $memory \\
             $args
@@ -51,7 +52,7 @@ process STAR_GENOMEGENERATE {
             --runMode genomeGenerate \\
             --genomeDir star/ \\
             --genomeFastaFiles $fasta \\
-            --sjdbGTFfile $gtf \\
+            $include_gtf \\
             --runThreadN $task.cpus \\
             --genomeSAindexNbases \$NUM_BASES \\
             $memory \\
@@ -67,30 +68,52 @@ process STAR_GENOMEGENERATE {
     }
 
     stub:
-    """
-    mkdir star
-    touch star/Genome
-    touch star/Log.out
-    touch star/SA
-    touch star/SAindex
-    touch star/chrLength.txt
-    touch star/chrName.txt
-    touch star/chrNameLength.txt
-    touch star/chrStart.txt
-    touch star/exonGeTrInfo.tab
-    touch star/exonInfo.tab
-    touch star/geneInfo.tab
-    touch star/genomeParameters.txt
-    touch star/sjdbInfo.txt
-    touch star/sjdbList.fromGTF.out.tab
-    touch star/sjdbList.out.tab
-    touch star/transcriptInfo.tab
+    if (gtf) {
+        """
+        mkdir star
+        touch star/Genome
+        touch star/Log.out
+        touch star/SA
+        touch star/SAindex
+        touch star/chrLength.txt
+        touch star/chrName.txt
+        touch star/chrNameLength.txt
+        touch star/chrStart.txt
+        touch star/exonGeTrInfo.tab
+        touch star/exonInfo.tab
+        touch star/geneInfo.tab
+        touch star/genomeParameters.txt
+        touch star/sjdbInfo.txt
+        touch star/sjdbList.fromGTF.out.tab
+        touch star/sjdbList.out.tab
+        touch star/transcriptInfo.tab
 
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        star: \$(STAR --version | sed -e "s/STAR_//g")
-        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
-        gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//')
-    END_VERSIONS
-    """
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            star: \$(STAR --version | sed -e "s/STAR_//g")
+            samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+            gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//')
+        END_VERSIONS
+        """
+    } else {
+        """
+        mkdir star
+        touch star/Genome
+        touch star/Log.out
+        touch star/SA
+        touch star/SAindex
+        touch star/chrLength.txt
+        touch star/chrName.txt
+        touch star/chrNameLength.txt
+        touch star/chrStart.txt
+        touch star/genomeParameters.txt
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            star: \$(STAR --version | sed -e "s/STAR_//g")
+            samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+            gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//')
+        END_VERSIONS
+        """
+    }
 }
diff --git a/modules/nf-core/star/genomegenerate/star-genomegenerate.diff b/modules/nf-core/star/genomegenerate/star-genomegenerate.diff
new file mode 100644
index 0000000..0181f46
--- /dev/null
+++ b/modules/nf-core/star/genomegenerate/star-genomegenerate.diff
@@ -0,0 +1,247 @@
+Changes in module 'nf-core/star/genomegenerate'
+--- modules/nf-core/star/genomegenerate/environment.yml
++++ modules/nf-core/star/genomegenerate/environment.yml
+@@ -1,9 +1,11 @@
+ name: star_genomegenerate
++
+ channels:
+   - conda-forge
+   - bioconda
+   - defaults
++
+ dependencies:
++  - bioconda::samtools=1.18
+   - bioconda::star=2.7.10a
+-  - bioconda::samtools=1.18
+   - conda-forge::gawk=5.1.0
+
+--- modules/nf-core/star/genomegenerate/main.nf
++++ modules/nf-core/star/genomegenerate/main.nf
+@@ -19,9 +19,10 @@
+     task.ext.when == null || task.ext.when
+ 
+     script:
+-    def args = task.ext.args ?: ''
+-    def args_list = args.tokenize()
+-    def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : ''
++    def args        = task.ext.args ?: ''
++    def args_list   = args.tokenize()
++    def memory      = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : ''
++    def include_gtf = gtf ? "--sjdbGTFfile $gtf" : ''
+     if (args_list.contains('--genomeSAindexNbases')) {
+         """
+         mkdir star
+@@ -29,7 +30,7 @@
+             --runMode genomeGenerate \\
+             --genomeDir star/ \\
+             --genomeFastaFiles $fasta \\
+-            --sjdbGTFfile $gtf \\
++            $include_gtf \\
+             --runThreadN $task.cpus \\
+             $memory \\
+             $args
+@@ -51,7 +52,7 @@
+             --runMode genomeGenerate \\
+             --genomeDir star/ \\
+             --genomeFastaFiles $fasta \\
+-            --sjdbGTFfile $gtf \\
++            $include_gtf \\
+             --runThreadN $task.cpus \\
+             --genomeSAindexNbases \$NUM_BASES \\
+             $memory \\
+@@ -67,30 +68,52 @@
+     }
+ 
+     stub:
+-    """
+-    mkdir star
+-    touch star/Genome
+-    touch star/Log.out
+-    touch star/SA
+-    touch star/SAindex
+-    touch star/chrLength.txt
+-    touch star/chrName.txt
+-    touch star/chrNameLength.txt
+-    touch star/chrStart.txt
+-    touch star/exonGeTrInfo.tab
+-    touch star/exonInfo.tab
+-    touch star/geneInfo.tab
+-    touch star/genomeParameters.txt
+-    touch star/sjdbInfo.txt
+-    touch star/sjdbList.fromGTF.out.tab
+-    touch star/sjdbList.out.tab
+-    touch star/transcriptInfo.tab
++    if (gtf) {
++        """
++        mkdir star
++        touch star/Genome
++        touch star/Log.out
++        touch star/SA
++        touch star/SAindex
++        touch star/chrLength.txt
++        touch star/chrName.txt
++        touch star/chrNameLength.txt
++        touch star/chrStart.txt
++        touch star/exonGeTrInfo.tab
++        touch star/exonInfo.tab
++        touch star/geneInfo.tab
++        touch star/genomeParameters.txt
++        touch star/sjdbInfo.txt
++        touch star/sjdbList.fromGTF.out.tab
++        touch star/sjdbList.out.tab
++        touch star/transcriptInfo.tab
+ 
+-    cat <<-END_VERSIONS > versions.yml
+-    "${task.process}":
+-        star: \$(STAR --version | sed -e "s/STAR_//g")
+-        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+-        gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//')
+-    END_VERSIONS
+-    """
++        cat <<-END_VERSIONS > versions.yml
++        "${task.process}":
++            star: \$(STAR --version | sed -e "s/STAR_//g")
++            samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
++            gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//')
++        END_VERSIONS
++        """
++    } else {
++        """
++        mkdir star
++        touch star/Genome
++        touch star/Log.out
++        touch star/SA
++        touch star/SAindex
++        touch star/chrLength.txt
++        touch star/chrName.txt
++        touch star/chrNameLength.txt
++        touch star/chrStart.txt
++        touch star/genomeParameters.txt
++
++        cat <<-END_VERSIONS > versions.yml
++        "${task.process}":
++            star: \$(STAR --version | sed -e "s/STAR_//g")
++            samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
++            gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//')
++        END_VERSIONS
++        """
++    }
+ }
+
+--- modules/nf-core/star/genomegenerate/tests/main.nf.test.snap
++++ modules/nf-core/star/genomegenerate/tests/main.nf.test.snap
+@@ -5,12 +5,18 @@
+                 "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f"
+             ]
+         ],
+-        "timestamp": "2023-12-04T18:01:27.298248806"
++        "timestamp": "2023-12-19T11:05:51.741109"
+     },
+-    "index": {
++    "index_with_gtf": {
+         "content": [
+-            "star"
++            "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, exonGeTrInfo.tab, exonInfo.tab, geneInfo.tab, genomeParameters.txt, sjdbInfo.txt, sjdbList.fromGTF.out.tab, sjdbList.out.tab, transcriptInfo.tab]"
+         ],
+-        "timestamp": "2023-11-23T11:31:47.560528"
++        "timestamp": "2023-12-19T11:38:14.551548"
++    },
++    "index_without_gtf": {
++        "content": [
++            "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, genomeParameters.txt]"
++        ],
++        "timestamp": "2023-12-19T11:38:22.382905"
+     }
+ }
+--- modules/nf-core/star/genomegenerate/tests/main.nf.test
++++ modules/nf-core/star/genomegenerate/tests/main.nf.test
+@@ -28,7 +28,86 @@
+         then {
+             assertAll(
+                 { assert process.success },
+-                { assert snapshot(file(process.out.index[0][1]).name).match("index") },
++                { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_with_gtf") },
++                { assert snapshot(process.out.versions).match("versions") }
++            )
++        }
++
++    }
++
++    test("homo_sapiens-stub") {
++
++        options '-stub'
++
++        when {
++            process {
++                """
++                input[0] = Channel.of([
++                    [ id:'test_fasta' ],
++                    [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
++                ])
++                input[1] = Channel.of([
++                    [ id:'test_gtf' ],
++                    [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
++                ])
++                """
++            }
++        }
++
++        then {
++            assertAll(
++                { assert process.success },
++                { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_with_gtf") },
++                { assert snapshot(process.out.versions).match("versions") }
++            )
++        }
++
++    }
++
++    test("homo_sapiens-without_gtf") {
++
++        when {
++            process {
++                """
++                input[0] = Channel.of([
++                    [ id:'test_fasta' ],
++                    [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
++                ])
++                input[1] = Channel.of([ [], [] ])
++                """
++            }
++        }
++
++        then {
++            assertAll(
++                { assert process.success },
++                { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_without_gtf") },
++                { assert snapshot(process.out.versions).match("versions") }
++            )
++        }
++
++    }
++
++    test("homo_sapiens-without_gtf-stub") {
++
++        options '-stub'
++
++        when {
++            process {
++                """
++                input[0] = Channel.of([
++                    [ id:'test_fasta' ],
++                    [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
++                ])
++                input[1] = Channel.of([ [], [] ])
++                """
++            }
++        }
++
++        then {
++            assertAll(
++                { assert process.success },
++                { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_without_gtf") },
+                 { assert snapshot(process.out.versions).match("versions") }
+             )
+         }
+
+************************************************************
diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test b/modules/nf-core/star/genomegenerate/tests/main.nf.test
index eed8292..af0c942 100644
--- a/modules/nf-core/star/genomegenerate/tests/main.nf.test
+++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test
@@ -28,7 +28,86 @@ nextflow_process {
         then {
             assertAll(
                 { assert process.success },
-                { assert snapshot(file(process.out.index[0][1]).name).match("index") },
+                { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_with_gtf") },
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+
+    }
+
+    test("homo_sapiens-stub") {
+
+        options '-stub'
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test_fasta' ],
+                    [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
+                ])
+                input[1] = Channel.of([
+                    [ id:'test_gtf' ],
+                    [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
+                ])
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_with_gtf") },
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+
+    }
+
+    test("homo_sapiens-without_gtf") {
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test_fasta' ],
+                    [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
+                ])
+                input[1] = Channel.of([ [], [] ])
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_without_gtf") },
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+
+    }
+
+    test("homo_sapiens-without_gtf-stub") {
+
+        options '-stub'
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test_fasta' ],
+                    [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
+                ])
+                input[1] = Channel.of([ [], [] ])
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_without_gtf") },
                 { assert snapshot(process.out.versions).match("versions") }
             )
         }
diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap
index e7bb6ee..9de08c7 100644
--- a/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap
+++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap
@@ -5,12 +5,18 @@
                 "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f"
             ]
         ],
-        "timestamp": "2023-12-04T18:01:27.298248806"
+        "timestamp": "2023-12-19T11:05:51.741109"
     },
-    "index": {
+    "index_with_gtf": {
         "content": [
-            "star"
+            "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, exonGeTrInfo.tab, exonInfo.tab, geneInfo.tab, genomeParameters.txt, sjdbInfo.txt, sjdbList.fromGTF.out.tab, sjdbList.out.tab, transcriptInfo.tab]"
         ],
-        "timestamp": "2023-11-23T11:31:47.560528"
+        "timestamp": "2023-12-19T11:38:14.551548"
+    },
+    "index_without_gtf": {
+        "content": [
+            "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, genomeParameters.txt]"
+        ],
+        "timestamp": "2023-12-19T11:38:22.382905"
     }
 }
\ No newline at end of file

From ec7ffc1b050d0f301baefed276a768d2f71913aa Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Tue, 19 Dec 2023 15:52:18 +1300
Subject: [PATCH 29/59] Incorporated fasta_edta_lai

---
 conf/modules.config                           |  10 +-
 modules.json                                  |  21 ++
 modules/pfr/lai/environment.yml               |   9 +
 modules/pfr/lai/main.nf                       |  69 +++++++
 modules/pfr/lai/meta.yml                      |  68 +++++++
 modules/pfr/lai/tests/main.nf.test            | 120 +++++++++++
 modules/pfr/lai/tests/main.nf.test.snap       |  10 +
 modules/pfr/lai/tests/nextflow.config         |  10 +
 modules/pfr/lai/tests/tags.yml                |   2 +
 nextflow.config                               |   8 +-
 subworkflows/local/fasta_edta.nf              |  43 ----
 subworkflows/local/prepare_assembly.nf        |  19 +-
 subworkflows/pfr/fasta_edta_lai/main.nf       |  88 ++++++++
 subworkflows/pfr/fasta_edta_lai/meta.yml      |  69 +++++++
 .../pfr/fasta_edta_lai/tests/main.nf.test     |  38 ++++
 .../pfr/fasta_edta_lai/tests/tags.yml         |   2 +
 workflows/pangene.nf                          | 188 +++++++++---------
 17 files changed, 621 insertions(+), 153 deletions(-)
 create mode 100644 modules/pfr/lai/environment.yml
 create mode 100644 modules/pfr/lai/main.nf
 create mode 100644 modules/pfr/lai/meta.yml
 create mode 100644 modules/pfr/lai/tests/main.nf.test
 create mode 100644 modules/pfr/lai/tests/main.nf.test.snap
 create mode 100644 modules/pfr/lai/tests/nextflow.config
 create mode 100644 modules/pfr/lai/tests/tags.yml
 delete mode 100644 subworkflows/local/fasta_edta.nf
 create mode 100644 subworkflows/pfr/fasta_edta_lai/main.nf
 create mode 100644 subworkflows/pfr/fasta_edta_lai/meta.yml
 create mode 100644 subworkflows/pfr/fasta_edta_lai/tests/main.nf.test
 create mode 100644 subworkflows/pfr/fasta_edta_lai/tests/tags.yml

diff --git a/conf/modules.config b/conf/modules.config
index 58830e9..392583a 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -1,18 +1,16 @@
 process {
-    withName: 'EDTA' {
+    withName: 'EDTA_EDTA' {
         ext.args = [
             params.edta_is_sensitive ? "--sensitive 1" :  "--sensitive 0",
             "--anno 0",
             "--force 1"
         ].join(' ').trim()
-    }
 
-    withName: 'RESTORE_EDTA_IDS' {
         publishDir = [
-            path: { "${params.outdir}/edta/${meta.id}" },
+            path: { "${params.outdir}/edta" },
             mode: "copy",
-            saveAs: { filename -> filename.equals("versions.yml") ? null : filename },
-            enabled: params.edta_save_outputs
+            pattern: '*.EDTA.TElib.fa',
+            enabled: params.edta_save_te_lib
         ]
     }
 
diff --git a/modules.json b/modules.json
index 80995ba..b57ef90 100644
--- a/modules.json
+++ b/modules.json
@@ -9,6 +9,7 @@
                         "branch": "main",
                         "git_sha": "e9f6bdd634bdbcd52c5568ba82f16176ec06631f",
                         "installed_by": [
+                            "fasta_edta_lai",
                             "modules"
                         ]
                     },
@@ -16,6 +17,7 @@
                         "branch": "main",
                         "git_sha": "5e0e41b51d7fc7f68ae43692b6fe19b95d7f3a8c",
                         "installed_by": [
+                            "fasta_edta_lai",
                             "modules"
                         ]
                     },
@@ -23,9 +25,17 @@
                         "branch": "main",
                         "git_sha": "d14b1f1d790cc01c11f8ec5aa80a9562c6808b20",
                         "installed_by": [
+                            "fasta_edta_lai",
                             "modules"
                         ]
                     },
+                    "lai": {
+                        "branch": "main",
+                        "git_sha": "7e6e3cb41362a045c6bb6065903efa0eba246e87",
+                        "installed_by": [
+                            "fasta_edta_lai"
+                        ]
+                    },
                     "liftoff": {
                         "branch": "main",
                         "git_sha": "3593ec100c92b656204bf739a51d62fd44d81f6f",
@@ -34,6 +44,17 @@
                         ]
                     }
                 }
+            },
+            "subworkflows": {
+                "pfr": {
+                    "fasta_edta_lai": {
+                        "branch": "main",
+                        "git_sha": "fde2f37998ba54ec9c0b5cf65a2f28f14af981b0",
+                        "installed_by": [
+                            "subworkflows"
+                        ]
+                    }
+                }
             }
         },
         "git@github.com:kherronism/nf-modules.git": {
diff --git a/modules/pfr/lai/environment.yml b/modules/pfr/lai/environment.yml
new file mode 100644
index 0000000..94fadbd
--- /dev/null
+++ b/modules/pfr/lai/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "lai"
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - "bioconda::LTR_retriever=2.9.0"
diff --git a/modules/pfr/lai/main.nf b/modules/pfr/lai/main.nf
new file mode 100644
index 0000000..d4fced9
--- /dev/null
+++ b/modules/pfr/lai/main.nf
@@ -0,0 +1,69 @@
+process LAI {
+    tag "$meta.id"
+    label 'process_high'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/ltr_retriever:2.9.0--hdfd78af_2':
+        'biocontainers/ltr_retriever:2.9.0--hdfd78af_2' }"
+
+    input:
+    tuple val(meta), path(fasta)
+    path pass_list
+    path annotation_out
+    path monoploid_seqs
+
+    output:
+    tuple val(meta), path("*.LAI.log")  , emit: log
+    tuple val(meta), path("*.LAI.out")  , emit: lai_out     , optional: true
+    path "versions.yml"                 , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args            = task.ext.args     ?: ''
+    def prefix          = task.ext.prefix   ?: "${meta.id}"
+    def monoploid_param = monoploid_seqs    ? "-mono $monoploid_seqs"                       : ''
+    def lai_output_name = monoploid_seqs    ? "${annotation_out}.${monoploid_seqs}.out.LAI" : "${annotation_out}.LAI"
+    """
+    # Remove comments from genome fasta,
+    # otherwise LAI triggers its sequence name change logic
+
+    sed \\
+        '/^>/ s/\\s.*\$//' \\
+        $fasta \\
+        > for_lai_no_comments.fsa
+
+    LAI \\
+        -genome for_lai_no_comments.fsa \\
+        -intact $pass_list \\
+        -all $annotation_out \\
+        -t $task.cpus \\
+        $monoploid_param \\
+        $args \\
+        > "${prefix}.LAI.log"
+
+    mv \\
+        $lai_output_name \\
+        "${prefix}.LAI.out" \\
+        || echo "LAI did not produce the output file"
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        lai: \$(cat /usr/local/share/LTR_retriever/LAI | grep "my \\\$version" | sed 's/my \$version="//; s/";//')
+    END_VERSIONS
+    """
+
+    stub:
+    def args    = task.ext.args ?: ''
+    def prefix  = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}.LAI.log
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        lai: \$(cat /usr/local/share/LTR_retriever/LAI | grep "my \\\$version" | sed 's/my \$version="//; s/";//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/pfr/lai/meta.yml b/modules/pfr/lai/meta.yml
new file mode 100644
index 0000000..6fd7aef
--- /dev/null
+++ b/modules/pfr/lai/meta.yml
@@ -0,0 +1,68 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "lai"
+description: Estimates the mean LTR sequence identity in the genome
+keywords:
+  - genomics
+  - annotation
+  - repeat
+  - long terminal retrotransposon
+  - retrotransposon
+  - stats
+  - qc
+tools:
+  - "lai":
+      description: Assessing genome assembly quality using the LTR Assembly Index (LAI)
+      homepage: "https://github.com/oushujun/LTR_retriever"
+      documentation: "https://github.com/oushujun/LTR_retriever"
+      tool_dev_url: "https://github.com/oushujun/LTR_retriever"
+      doi: "10.1093/nar/gky730"
+      licence: ["GPL v3"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. `[ id:'sample1' ]`
+  - fasta:
+      type: file
+      description: The genome file that is used to generate everything
+      pattern: "*.{fsa,fa,fasta}"
+  - pass_list:
+      type: file
+      description: A list of intact LTR-RTs generated by LTR_retriever
+      pattern: "*.pass.list"
+  - annotation_out:
+      type: file
+      description: RepeatMasker annotation of all LTR sequences in the genome
+      pattern: "*.out"
+  - monoploid_seqs:
+      type: file
+      description: |
+        This parameter is mainly for ployploid genomes. User provides a list of
+        sequence names that represent a monoploid (1x). LAI will be calculated only
+        on these sequences if provided.
+      pattern: "*.txt"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. `[ id:'sample1', single_end:false ]`
+  - log:
+      type: file
+      description: Log from LAI
+      pattern: "*.LAI.log"
+  - lai_out:
+      type: file
+      description: |
+        Output file from LAI if LAI is able to estimate the index from the inputs
+      pattern: "*.LAI.out"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@GallVp"
+maintainers:
+  - "@GallVp"
diff --git a/modules/pfr/lai/tests/main.nf.test b/modules/pfr/lai/tests/main.nf.test
new file mode 100644
index 0000000..353043c
--- /dev/null
+++ b/modules/pfr/lai/tests/main.nf.test
@@ -0,0 +1,120 @@
+nextflow_process {
+
+    name "Test Process LAI"
+    script "../main.nf"
+    process "LAI"
+    config "./nextflow.config"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "lai"
+    tag "gt/suffixerator"
+    tag "nf-core/gunzip"
+    tag "gt/ltrharvest"
+    tag "ltrretriever"
+
+    test("homo_sapiens-genome_21_fasta-success") {
+
+        setup {
+            run("GUNZIP") {
+                script "../../../nf-core/gunzip"
+
+                process {
+                    """
+                    input[0] = [
+                        [ id:'test' ],
+                        file('/Users/hrauxr/Projects/nxf-modules/tests/data/chr1.fa.gz', checkIfExists: true)
+                    ]
+                    """
+                }
+            }
+
+            run("GT_SUFFIXERATOR") {
+                script "../../../pfr/gt/suffixerator"
+
+                process {
+                    """
+                    input[0] = GUNZIP.out.gunzip
+                    """
+                }
+            }
+
+            run("GT_LTRHARVEST") {
+                script "../../../pfr/gt/ltrharvest"
+
+                process {
+                    """
+                    input[0] = GT_SUFFIXERATOR.out.index
+                    """
+                }
+            }
+
+            run("LTRRETRIEVER") {
+                script "../../../pfr/ltrretriever"
+
+                process {
+                    """
+                    input[0] = GUNZIP.out.gunzip
+                    input[1] = GT_LTRHARVEST.out.tabout.map { meta, tabout -> tabout }
+                    input[2] = []
+                    input[3] = []
+                    input[4] = []
+                    """
+                }
+            }
+        }
+
+        when {
+            process {
+                """
+                input[0] = GUNZIP.out.gunzip
+                input[1] = LTRRETRIEVER.out.pass_list.map { meta, pass_list -> pass_list }
+                input[2] = LTRRETRIEVER.out.annotation_out.map { meta, annotation_out -> annotation_out }
+                input[3] = []
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert path(process.out.log.get(0).get(1)).getText().contains("Dependency checking: Passed!") },
+                { assert path(process.out.log.get(0).get(1)).getText().contains("Calculate LAI:") },
+                { assert path(process.out.log.get(0).get(1)).getText().contains("Total LTR sequence content (0%) is too low for accurate LAI calculation") },
+                { assert path(process.out.log.get(0).get(1)).getText().contains("Sorry, LAI is not applicable on the current genome assembly.") },
+                { assert process.out.lai_out == [] },
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+
+    }
+
+    test("stub") {
+
+        options '-stub'
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ],
+                    file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
+                ]
+                input[1] = []
+                input[2] = []
+                input[3] = []
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() },
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+
+    }
+
+}
diff --git a/modules/pfr/lai/tests/main.nf.test.snap b/modules/pfr/lai/tests/main.nf.test.snap
new file mode 100644
index 0000000..751ddb6
--- /dev/null
+++ b/modules/pfr/lai/tests/main.nf.test.snap
@@ -0,0 +1,10 @@
+{
+    "versions": {
+        "content": [
+            [
+                "versions.yml:md5,2ac93e1e6324236af6f9a794bbac2099"
+            ]
+        ],
+        "timestamp": "2023-12-05T12:15:32.969684"
+    }
+}
\ No newline at end of file
diff --git a/modules/pfr/lai/tests/nextflow.config b/modules/pfr/lai/tests/nextflow.config
new file mode 100644
index 0000000..516a3e2
--- /dev/null
+++ b/modules/pfr/lai/tests/nextflow.config
@@ -0,0 +1,10 @@
+process {
+
+    withName: GT_SUFFIXERATOR {
+        ext.args = '-tis -suf -lcp -des -ssp -sds -dna'
+    }
+
+    withName: GT_LTRHARVEST {
+        ext.args = '-minlenltr 100 -maxlenltr 7000 -mintsd 4 -maxtsd 6 -motif TGCA -motifmis 1 -similar 85 -vic 10 -seed 20 -seqids yes'
+    }
+}
diff --git a/modules/pfr/lai/tests/tags.yml b/modules/pfr/lai/tests/tags.yml
new file mode 100644
index 0000000..252295d
--- /dev/null
+++ b/modules/pfr/lai/tests/tags.yml
@@ -0,0 +1,2 @@
+lai:
+  - "modules/pfr/lai/**"
diff --git a/nextflow.config b/nextflow.config
index 135bf29..669b8ca 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -2,8 +2,8 @@ includeConfig './conf/base.config'
 
 params {
     target_assemblies           = [
-        ["red5_v2p1", "/workspace/hrauxr/pangene/.test/red5_v2p1_chr1.fasta"],
-        ["donghong", "/workspace/hrauxr/pangene/.test/donghong.chr1.fsa.gz"]
+        ["red5_v2p1", ".test/red5_v2p1_chr1.fasta"],
+        ["donghong", ".test/donghong.chr1.fsa.gz"]
     ]
     // Pattern:             [ [tag, fasta(.gz) ] ]
     // Permissible tags:    tag, tag_1, tag_tag2_3, tag_tag2_tag3;
@@ -11,7 +11,7 @@ params {
     //                      "." is not allowed in the tag name
     
     te_libraries                = [
-        ["donghong", "/workspace/hrauxr/pangene/.test/donghong.TElib.fa.gz"]
+        ["donghong", ".test/donghong.TElib.fa.gz"]
     ]
     // Pattern:             [ [tag, fasta(.gz) ] ]
     // Optional             Set to null if libraries are not available.
@@ -21,7 +21,7 @@ params {
     // When the TE lib is not available for a traget assembly, EDTA is used to create one.
     
     edta_is_sensitive           = false
-    edta_save_outputs           = false
+    edta_save_te_lib            = true
     
     repeatmasker_save_outputs   = true
     
diff --git a/subworkflows/local/fasta_edta.nf b/subworkflows/local/fasta_edta.nf
deleted file mode 100644
index c47e557..0000000
--- a/subworkflows/local/fasta_edta.nf
+++ /dev/null
@@ -1,43 +0,0 @@
-include { SHORTEN_EDTA_IDS  } from '../../modules/local/edta/shorten_edta_ids'
-include { EDTA              } from '../../modules/local/edta/edta'
-include { RESTORE_EDTA_IDS  } from '../../modules/local/edta/restore_edta_ids'
-
-workflow FASTA_EDTA {
-    take:
-    genome_fasta    // channel: [ meta, fasta ]
-    
-    main:
-    SHORTEN_EDTA_IDS(genome_fasta)
-    .renamed_ids_fasta
-    | EDTA
-
-    RESTORE_EDTA_IDS(
-        EDTA.out.te_lib_fasta,
-        EDTA.out.intact_gff3.map { it[1] },
-        EDTA.out.pass_list.map { it[1] },
-        EDTA.out.out_file.map { it[1] },
-        EDTA.out.te_anno_gff3.map { it[1] },
-        SHORTEN_EDTA_IDS.out.renamed_ids_tsv.map { it[1] }
-    )
-
-    Channel.empty()
-    | mix(
-        SHORTEN_EDTA_IDS.out.versions.first()
-    )
-    | mix(
-        EDTA.out.versions.first()
-    )
-    | mix(
-        RESTORE_EDTA_IDS.out.versions.first()
-    )
-    | set { ch_versions }
-    
-    emit:
-    te_lib_fasta    = RESTORE_EDTA_IDS.out.te_lib_fasta     // channel: [ meta, fasta ]
-    intact_gff3     = RESTORE_EDTA_IDS.out.intact_gff3      // channel: [ meta, gff3 ]
-    pass_list       = RESTORE_EDTA_IDS.out.pass_list        // channel: [ meta, pass.list ]
-    out_file        = RESTORE_EDTA_IDS.out.out_file         // channel: [ meta, out.file ]
-    te_anno_gff3    = RESTORE_EDTA_IDS.out.te_anno_gff3     // channel: [ meta, gff3 ]
-    renamed_ids_tsv = RESTORE_EDTA_IDS.out.renamed_ids_tsv  // channel: [ meta, tsv ]
-    versions        = ch_versions                           // channel: [ versions.yml ]
-}
\ No newline at end of file
diff --git a/subworkflows/local/prepare_assembly.nf b/subworkflows/local/prepare_assembly.nf
index 05bd1ec..db2e3c3 100644
--- a/subworkflows/local/prepare_assembly.nf
+++ b/subworkflows/local/prepare_assembly.nf
@@ -4,6 +4,8 @@ include { FASTAVALIDATOR                        } from '../../modules/nf-core/fa
 include { REPEATMASKER                          } from '../../modules/kherronism/repeatmasker'
 include { STAR_GENOMEGENERATE                   } from '../../modules/nf-core/star/genomegenerate'
 
+include { FASTA_EDTA_LAI                        } from '../../subworkflows/pfr/fasta_edta_lai'
+
 workflow PREPARE_ASSEMBLY {
     take:
     target_assembly     // channel: [ meta, fasta ]
@@ -57,7 +59,7 @@ workflow PREPARE_ASSEMBLY {
     )
     | set { ch_gunzip_te_library }
 
-    // SUBWORKFLOW: FASTA_EDTA
+    // SUBWORKFLOW: FASTA_EDTA_LAI
     ch_validated_target_assembly
     | join(
         ch_gunzip_te_library, remainder: true
@@ -66,12 +68,18 @@ workflow PREPARE_ASSEMBLY {
         teLib == null
     }
     | map { meta, assembly, teLib -> [meta, assembly] }
-    | FASTA_EDTA
+    | set { ch_edta_inputs }
+    
+    FASTA_EDTA_LAI (
+        ch_edta_inputs,
+        [],
+        true // Skip LAI
+    )
     
     // MODULE: REPEATMASKER
     ch_validated_target_assembly
     | join(
-        FASTA_EDTA.out.te_lib_fasta.mix(ch_gunzip_te_library)
+        FASTA_EDTA_LAI.out.te_lib_fasta.mix(ch_gunzip_te_library)
     )
     | set { ch_assembly_n_te_lib }
 
@@ -84,8 +92,7 @@ workflow PREPARE_ASSEMBLY {
     def star_ignore_sjdbgtf = true
     STAR_GENOMEGENERATE(
         ch_validated_target_assembly,
-        ch_validated_target_assembly.map { meta, maskedFasta -> [meta, []] },
-        star_ignore_sjdbgtf
+        ch_validated_target_assembly.map { meta, fasta -> [ [], [] ] }
     )
     .index
     | set { ch_assembly_index }
@@ -93,7 +100,7 @@ workflow PREPARE_ASSEMBLY {
     Channel.empty()
     | mix(FASTAVALIDATOR.out.versions.first())
     | mix(GUNZIP_TE_LIBRARY.out.versions.first())
-    | mix(FASTA_EDTA.out.versions)
+    | mix(FASTA_EDTA_LAI.out.versions)
     | mix(REPEATMASKER.out.versions.first())
     | mix(STAR_GENOMEGENERATE.out.versions.first())
     | mix(GUNZIP_TARGET_ASSEMBLY.out.versions.first())
diff --git a/subworkflows/pfr/fasta_edta_lai/main.nf b/subworkflows/pfr/fasta_edta_lai/main.nf
new file mode 100644
index 0000000..2e73ca5
--- /dev/null
+++ b/subworkflows/pfr/fasta_edta_lai/main.nf
@@ -0,0 +1,88 @@
+include { CUSTOM_SHORTENFASTAIDS    } from '../../../modules/pfr/custom/shortenfastaids'
+include { EDTA_EDTA                 } from '../../../modules/pfr/edta/edta'
+include { LAI                       } from '../../../modules/pfr/lai'
+include { CUSTOM_RESTOREGFFIDS      } from '../../../modules/pfr/custom/restoregffids'
+
+workflow FASTA_EDTA_LAI {
+
+    take:
+    ch_fasta                // channel: [ val(meta), fasta ]
+    ch_monoploid_seqs       // channel: [ val(meta), txt ]; Optional: Set to [] if not needed
+    skip_lai                // val; true|false
+
+    main:
+
+    ch_versions             = Channel.empty()
+
+    // MOUDLE: CUSTOM_SHORTENFASTAIDS
+    CUSTOM_SHORTENFASTAIDS ( ch_fasta )
+
+    ch_short_ids_fasta      = ch_fasta
+                            | join(CUSTOM_SHORTENFASTAIDS.out.short_ids_fasta, by:0, remainder:true)
+                            | map { meta, fasta, short_ids_fasta ->
+                                [ meta, short_ids_fasta ?: fasta ]
+                            }
+
+    ch_short_ids_tsv        = CUSTOM_SHORTENFASTAIDS.out.short_ids_tsv
+    ch_versions             = ch_versions.mix(CUSTOM_SHORTENFASTAIDS.out.versions.first())
+
+    // MODULE: EDTA_EDTA
+    EDTA_EDTA (
+        ch_short_ids_fasta,
+        [],
+        [],
+        [],
+        []
+    )
+
+    ch_te_lib_fasta         = EDTA_EDTA.out.te_lib_fasta
+    ch_pass_list            = EDTA_EDTA.out.pass_list
+    ch_out_file             = EDTA_EDTA.out.out_file
+    ch_te_anno_gff3         = EDTA_EDTA.out.te_anno_gff3
+    ch_versions             = ch_versions.mix(EDTA_EDTA.out.versions.first())
+
+    // MODULE: LAI
+    ch_lai_inputs           = skip_lai
+                            ? Channel.empty()
+                            : ch_short_ids_fasta
+                            | join(ch_pass_list)
+                            | join(ch_out_file)
+                            | join(
+                                ch_monoploid_seqs ?: Channel.empty(),
+                                by:0,
+                                remainder: true
+                            )
+                            | map { meta, fasta, pass, out, mono ->
+                                [ meta, fasta, pass, out, mono ?: [] ]
+                            }
+    LAI (
+        ch_lai_inputs.map { meta, fasta, pass, out, mono -> [ meta, fasta ] },
+        ch_lai_inputs.map { meta, fasta, pass, out, mono -> pass },
+        ch_lai_inputs.map { meta, fasta, pass, out, mono -> out },
+        ch_lai_inputs.map { meta, fasta, pass, out, mono -> mono }
+    )
+
+    ch_lai_log              = LAI.out.log
+    ch_lai_out              = LAI.out.lai_out
+    ch_versions             = ch_versions.mix(LAI.out.versions.first())
+
+    // MODULE: CUSTOM_RESTOREGFFIDS
+    ch_restorable_gff_tsv   = ch_te_anno_gff3.join(ch_short_ids_tsv)
+
+    CUSTOM_RESTOREGFFIDS (
+        ch_restorable_gff_tsv.map { meta, gff, tsv -> [ meta, gff ] },
+        ch_restorable_gff_tsv.map { meta, gff, tsv -> tsv }
+    )
+
+    ch_restored_gff         = ch_te_anno_gff3
+                            | join(CUSTOM_RESTOREGFFIDS.out.restored_ids_gff3, by:0, remainder:true)
+                            | map { meta, gff, restored_gff -> [ meta, restored_gff ?: gff ] }
+    ch_versions             = ch_versions.mix(CUSTOM_RESTOREGFFIDS.out.versions.first())
+
+    emit:
+    te_lib_fasta            = ch_te_lib_fasta   // channel: [ val(meta), fasta ]
+    te_anno_gff3            = ch_restored_gff   // channel: [ val(meta), gff ]
+    lai_log                 = ch_lai_log        // channel: [ val(meta), log ]
+    lai_out                 = ch_lai_out        // channel: [ val(meta), out ]
+    versions                = ch_versions       // channel: [ versions.yml ]
+}
diff --git a/subworkflows/pfr/fasta_edta_lai/meta.yml b/subworkflows/pfr/fasta_edta_lai/meta.yml
new file mode 100644
index 0000000..52483ce
--- /dev/null
+++ b/subworkflows/pfr/fasta_edta_lai/meta.yml
@@ -0,0 +1,69 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
+name: "fasta_edta_lai"
+description: |
+  Performs extensive de-novo transposable element annotation with EDTA and optionally estimates repeat-space completeness with LAI
+keywords:
+  - genomics
+  - genome
+  - annotation
+  - repeat
+  - transposons
+  - stats
+  - qc
+components:
+  - custom/restoregffids
+  - custom/shortenfastaids
+  - edta/edta
+  - lai
+input:
+  - ch_fasta:
+      type: file
+      description: |
+        Channel for the assembly fasta file
+        Structure: [ val(meta), path(fasta) ]
+      pattern: "*.{fsa/fa/fasta}"
+  - ch_monoploid_seqs:
+      type: file
+      description: |
+        Channel for providing a list of monoploid sequences
+        for correct estimation of LAI for polyploid genomes.
+        This parameter is useful when all the haplotypes are
+        stored in a single fasta file.
+        Structure: [ val(meta), path(txt) ]
+      pattern: "*.txt"
+  - skip_lai:
+      type: boolean
+      description: |
+        Skip LAI estimation
+        Structure: [ val(boolean) ]
+output:
+  - te_lib_fasta:
+      type: file
+      description: A non-redundant TE library in fasta format
+      pattern: "*.EDTA.TElib.fa"
+  - te_anno_gff3:
+      type: file
+      description: A gff3 file containing both structurally intact and fragmented TE annotations
+      pattern: "*.EDTA.TEanno.gff3"
+  - lai_log:
+      type: file
+      description: |
+        Log from LAI
+        Structure: [ val(meta), path(log) ]
+      pattern: "*.LAI.log"
+  - lai_out:
+      type: file
+      description: |
+        LAI output
+        Structure: [ val(meta), path(out) ]
+      pattern: "*.LAI.out"
+  - versions:
+      type: file
+      description: |
+        File containing software versions
+        Structure: [ path(versions.yml) ]
+      pattern: "versions.yml"
+authors:
+  - "@GallVp"
+maintainers:
+  - "@GallVp"
diff --git a/subworkflows/pfr/fasta_edta_lai/tests/main.nf.test b/subworkflows/pfr/fasta_edta_lai/tests/main.nf.test
new file mode 100644
index 0000000..a4fa87b
--- /dev/null
+++ b/subworkflows/pfr/fasta_edta_lai/tests/main.nf.test
@@ -0,0 +1,38 @@
+nextflow_workflow {
+
+    name "Test Workflow FASTA_EDTA_LAI"
+    script "../main.nf"
+    workflow "FASTA_EDTA_LAI"
+
+    tag "subworkflows"
+    tag "subworkflows_nfcore"
+    tag "subworkflows/fasta_edta_lai"
+    tag "fasta_edta_lai"
+    tag "lai"
+    tag "edta/edta"
+    tag "custom/restoregffids"
+    tag "custom/shortenfastaids"
+
+    test("test_data") {
+
+        when {
+            workflow {
+                """
+                input[0] = Channel.of([
+                    [ id:'test' ],
+                    file("/Users/hrauxr/Projects/nxf-modules/data/chr1.fa", checkIfExists: true)
+                ])
+                input[1] = []
+                input[2] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert workflow.success },
+                { assert snapshot(workflow.out.versions).match("versions") }
+            )
+        }
+    }
+}
diff --git a/subworkflows/pfr/fasta_edta_lai/tests/tags.yml b/subworkflows/pfr/fasta_edta_lai/tests/tags.yml
new file mode 100644
index 0000000..b114c58
--- /dev/null
+++ b/subworkflows/pfr/fasta_edta_lai/tests/tags.yml
@@ -0,0 +1,2 @@
+subworkflows/fasta_edta_lai:
+  - subworkflows/pfr/fasta_edta_lai/**
diff --git a/workflows/pangene.nf b/workflows/pangene.nf
index 9110688..6241ab7 100644
--- a/workflows/pangene.nf
+++ b/workflows/pangene.nf
@@ -1,15 +1,15 @@
 include { validateParams                } from '../modules/local/validate_params'
 
 include { PREPARE_ASSEMBLY              } from '../subworkflows/local/prepare_assembly'
-include { PREPROCESS_RNASEQ             } from '../subworkflows/local/preprocess_rnaseq'
-include { ALIGN_RNASEQ                  } from '../subworkflows/local/align_rnaseq'
-include { PREPARE_EXT_PROTS             } from '../subworkflows/local/prepare_ext_prots'
+// include { PREPROCESS_RNASEQ             } from '../subworkflows/local/preprocess_rnaseq'
+// include { ALIGN_RNASEQ                  } from '../subworkflows/local/align_rnaseq'
+// include { PREPARE_EXT_PROTS             } from '../subworkflows/local/prepare_ext_prots'
 
-include { BRAKER3                       } from '../modules/kherronism/braker3'
+// include { BRAKER3                       } from '../modules/kherronism/braker3'
 
-include { FASTA_LIFTOFF                 } from '../subworkflows/local/fasta_liftoff'
+// include { FASTA_LIFTOFF                 } from '../subworkflows/local/fasta_liftoff'
 
-include { CUSTOM_DUMPSOFTWAREVERSIONS   } from '../modules/nf-core/custom/dumpsoftwareversions'
+// include { CUSTOM_DUMPSOFTWAREVERSIONS   } from '../modules/nf-core/custom/dumpsoftwareversions'
 
 validateParams(params)
 
@@ -46,26 +46,26 @@ workflow PANGENE {
                                 | collect
                                 : Channel.empty()
 
-    ch_ext_prot_fastas          = params.external_protein_fastas
-                                ? Channel.fromList(params.external_protein_fastas)
-                                | map { filePath ->
-                                    def fileHandle = file(filePath, checkIfExists: true)
-                                    [[id:fileHandle.getSimpleName()], fileHandle]
-                                }
-                                : Channel.empty()
+    // ch_ext_prot_fastas          = params.external_protein_fastas
+    //                             ? Channel.fromList(params.external_protein_fastas)
+    //                             | map { filePath ->
+    //                                 def fileHandle = file(filePath, checkIfExists: true)
+    //                                 [[id:fileHandle.getSimpleName()], fileHandle]
+    //                             }
+    //                             : Channel.empty()
     
-    ch_xref_annotations_mm      = params.liftoff_xref_annotations
-                                ? Channel.fromList(params.liftoff_xref_annotations)
-                                | multiMap { fasta, gff ->
-                                    def fastaFile = file(fasta, checkIfExists:true)
+    // ch_xref_annotations_mm      = params.liftoff_xref_annotations
+    //                             ? Channel.fromList(params.liftoff_xref_annotations)
+    //                             | multiMap { fasta, gff ->
+    //                                 def fastaFile = file(fasta, checkIfExists:true)
 
-                                    fasta: [[id:fastaFile.getSimpleName()], fastaFile]
-                                    gff: [[id:fastaFile.getSimpleName()], file(gff, checkIfExists:true)]
-                                }
-                                : Channel.empty()
+    //                                 fasta: [[id:fastaFile.getSimpleName()], fastaFile]
+    //                                 gff: [[id:fastaFile.getSimpleName()], file(gff, checkIfExists:true)]
+    //                             }
+    //                             : Channel.empty()
 
-    ch_xref_annotations_fasta   = ch_xref_annotations_mm.fasta
-    ch_xref_annotations_gff     = ch_xref_annotations_mm.gff
+    // ch_xref_annotations_fasta   = ch_xref_annotations_mm.fasta
+    // ch_xref_annotations_gff     = ch_xref_annotations_mm.gff
 
     // SUBWORKFLOW: PREPARE_ASSEMBLY
     PREPARE_ASSEMBLY(
@@ -78,76 +78,76 @@ workflow PANGENE {
     ch_target_assemby_index     = PREPARE_ASSEMBLY.out.target_assemby_index
     ch_versions                 = ch_versions.mix(PREPARE_ASSEMBLY.out.versions)
 
-    // SUBWORKFLOW: PREPROCESS_RNASEQ
-    PREPROCESS_RNASEQ(
-        ch_samplesheet,
-        ch_tar_assm_str,
-        params.skip_fastqc,
-        params.skip_fastp,
-        params.save_trimmed,
-        params.min_trimmed_reads,
-        params.remove_ribo_rna,
-        ch_sortmerna_fastas
-    )
-
-    ch_trim_reads               = PREPROCESS_RNASEQ.out.trim_reads
-    ch_reads_target             = PREPROCESS_RNASEQ.out.reads_target
-    ch_versions                 = ch_versions.mix(PREPROCESS_RNASEQ.out.versions)
-
-    // SUBWORKFLOW: ALIGN_RNASEQ
-    ALIGN_RNASEQ(
-        ch_reads_target,
-        ch_trim_reads,
-        ch_target_assemby_index
-    )
-
-    ch_rnaseq_bam               = ALIGN_RNASEQ.out.bam
-    ch_versions                 = ch_versions.mix(ALIGN_RNASEQ.out.versions)
-
-    // MODULE: PREPARE_EXT_PROTS
-    PREPARE_EXT_PROTS(
-        ch_ext_prot_fastas
-    )
-
-    ch_ext_prots_fasta          = PREPARE_EXT_PROTS.out.ext_prots_fasta
-    ch_versions                 = ch_versions.mix(PREPARE_EXT_PROTS.out.versions)
-
-    // MODULE: BRAKER3
-    ch_braker_inputs            = ch_masked_target_assembly
-                                | join(ch_rnaseq_bam, remainder: true)
-                                | combine(
-                                    ch_ext_prots_fasta.map { meta, filePath -> filePath }.ifEmpty(null)
-                                )
-                                | map { meta, fasta, bam, prots -> [meta, fasta, bam ?: [], prots ?: []] }
+    // // SUBWORKFLOW: PREPROCESS_RNASEQ
+    // PREPROCESS_RNASEQ(
+    //     ch_samplesheet,
+    //     ch_tar_assm_str,
+    //     params.skip_fastqc,
+    //     params.skip_fastp,
+    //     params.save_trimmed,
+    //     params.min_trimmed_reads,
+    //     params.remove_ribo_rna,
+    //     ch_sortmerna_fastas
+    // )
+
+    // ch_trim_reads               = PREPROCESS_RNASEQ.out.trim_reads
+    // ch_reads_target             = PREPROCESS_RNASEQ.out.reads_target
+    // ch_versions                 = ch_versions.mix(PREPROCESS_RNASEQ.out.versions)
+
+    // // SUBWORKFLOW: ALIGN_RNASEQ
+    // ALIGN_RNASEQ(
+    //     ch_reads_target,
+    //     ch_trim_reads,
+    //     ch_target_assemby_index
+    // )
+
+    // ch_rnaseq_bam               = ALIGN_RNASEQ.out.bam
+    // ch_versions                 = ch_versions.mix(ALIGN_RNASEQ.out.versions)
+
+    // // MODULE: PREPARE_EXT_PROTS
+    // PREPARE_EXT_PROTS(
+    //     ch_ext_prot_fastas
+    // )
+
+    // ch_ext_prots_fasta          = PREPARE_EXT_PROTS.out.ext_prots_fasta
+    // ch_versions                 = ch_versions.mix(PREPARE_EXT_PROTS.out.versions)
+
+    // // MODULE: BRAKER3
+    // ch_braker_inputs            = ch_masked_target_assembly
+    //                             | join(ch_rnaseq_bam, remainder: true)
+    //                             | combine(
+    //                                 ch_ext_prots_fasta.map { meta, filePath -> filePath }.ifEmpty(null)
+    //                             )
+    //                             | map { meta, fasta, bam, prots -> [meta, fasta, bam ?: [], prots ?: []] }
     
-    def rnaseq_sets_dirs        = []
-    def rnaseq_sets_ids         = []
-    def hintsfile               = []
-
-    BRAKER3(
-        ch_braker_inputs.map { meta, fasta, bam, prots -> [meta, fasta] },
-        ch_braker_inputs.map { meta, fasta, bam, prots -> bam },
-        rnaseq_sets_dirs,
-        rnaseq_sets_ids,
-        ch_braker_inputs.map { meta, fasta, bam, prots -> prots },
-        hintsfile
-    )
-
-    ch_braker_gff3              = BRAKER3.out.gff3
-    ch_versions                 = ch_versions.mix(BRAKER3.out.versions.first())
-
-    // SUBWORKFLOW: FASTA_LIFTOFF
-    FASTA_LIFTOFF(
-        ch_valid_target_assembly,
-        ch_xref_annotations_fasta,
-        ch_xref_annotations_gff
-    )
-
-    ch_liftoff_gff3             = FASTA_LIFTOFF.out.gff3
-    ch_versions                 = ch_versions.mix(FASTA_LIFTOFF.out.versions)
-
-    // MODULE: CUSTOM_DUMPSOFTWAREVERSIONS
-    CUSTOM_DUMPSOFTWAREVERSIONS (
-        ch_versions.unique().collectFile(name: 'collated_versions.yml')
-    )
+    // def rnaseq_sets_dirs        = []
+    // def rnaseq_sets_ids         = []
+    // def hintsfile               = []
+
+    // BRAKER3(
+    //     ch_braker_inputs.map { meta, fasta, bam, prots -> [meta, fasta] },
+    //     ch_braker_inputs.map { meta, fasta, bam, prots -> bam },
+    //     rnaseq_sets_dirs,
+    //     rnaseq_sets_ids,
+    //     ch_braker_inputs.map { meta, fasta, bam, prots -> prots },
+    //     hintsfile
+    // )
+
+    // ch_braker_gff3              = BRAKER3.out.gff3
+    // ch_versions                 = ch_versions.mix(BRAKER3.out.versions.first())
+
+    // // SUBWORKFLOW: FASTA_LIFTOFF
+    // FASTA_LIFTOFF(
+    //     ch_valid_target_assembly,
+    //     ch_xref_annotations_fasta,
+    //     ch_xref_annotations_gff
+    // )
+
+    // ch_liftoff_gff3             = FASTA_LIFTOFF.out.gff3
+    // ch_versions                 = ch_versions.mix(FASTA_LIFTOFF.out.versions)
+
+    // // MODULE: CUSTOM_DUMPSOFTWAREVERSIONS
+    // CUSTOM_DUMPSOFTWAREVERSIONS (
+    //     ch_versions.unique().collectFile(name: 'collated_versions.yml')
+    // )
 }
\ No newline at end of file

From 2de0d224d0b2720e780f9cb7c179c8e8bad347f5 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Wed, 20 Dec 2023 14:23:55 +1300
Subject: [PATCH 30/59] Trying to add FASTQ_FASTQC_UMITOOLS_FASTP

---
 nextflow.config      |  2 +-
 workflows/pangene.nf | 30 +++++++++++++++---------------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index 669b8ca..d861331 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -25,7 +25,7 @@ params {
     
     repeatmasker_save_outputs   = true
     
-    samplesheet                 = "./.test/samplesheet.csv"
+    samplesheet                 = "./.test/samplesheet_small.csv"
     // Optional: Set to null if not available
 
     skip_fastqc                 = false
diff --git a/workflows/pangene.nf b/workflows/pangene.nf
index 6241ab7..82fdbe4 100644
--- a/workflows/pangene.nf
+++ b/workflows/pangene.nf
@@ -1,7 +1,7 @@
 include { validateParams                } from '../modules/local/validate_params'
 
 include { PREPARE_ASSEMBLY              } from '../subworkflows/local/prepare_assembly'
-// include { PREPROCESS_RNASEQ             } from '../subworkflows/local/preprocess_rnaseq'
+include { PREPROCESS_RNASEQ             } from '../subworkflows/local/preprocess_rnaseq'
 // include { ALIGN_RNASEQ                  } from '../subworkflows/local/align_rnaseq'
 // include { PREPARE_EXT_PROTS             } from '../subworkflows/local/prepare_ext_prots'
 
@@ -78,21 +78,21 @@ workflow PANGENE {
     ch_target_assemby_index     = PREPARE_ASSEMBLY.out.target_assemby_index
     ch_versions                 = ch_versions.mix(PREPARE_ASSEMBLY.out.versions)
 
-    // // SUBWORKFLOW: PREPROCESS_RNASEQ
-    // PREPROCESS_RNASEQ(
-    //     ch_samplesheet,
-    //     ch_tar_assm_str,
-    //     params.skip_fastqc,
-    //     params.skip_fastp,
-    //     params.save_trimmed,
-    //     params.min_trimmed_reads,
-    //     params.remove_ribo_rna,
-    //     ch_sortmerna_fastas
-    // )
+    // SUBWORKFLOW: PREPROCESS_RNASEQ
+    PREPROCESS_RNASEQ(
+        ch_samplesheet,
+        ch_tar_assm_str,
+        params.skip_fastqc,
+        params.skip_fastp,
+        params.save_trimmed,
+        params.min_trimmed_reads,
+        params.remove_ribo_rna,
+        ch_sortmerna_fastas
+    )
 
-    // ch_trim_reads               = PREPROCESS_RNASEQ.out.trim_reads
-    // ch_reads_target             = PREPROCESS_RNASEQ.out.reads_target
-    // ch_versions                 = ch_versions.mix(PREPROCESS_RNASEQ.out.versions)
+    ch_trim_reads               = PREPROCESS_RNASEQ.out.trim_reads
+    ch_reads_target             = PREPROCESS_RNASEQ.out.reads_target
+    ch_versions                 = ch_versions.mix(PREPROCESS_RNASEQ.out.versions)
 
     // // SUBWORKFLOW: ALIGN_RNASEQ
     // ALIGN_RNASEQ(

From 48e72710950ff5e4b4201a27b2331f64d50e4bbe Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Thu, 21 Dec 2023 10:15:14 +1300
Subject: [PATCH 31/59] Updated modules and applied prettier

---
 .nf-core.yml                                  |   2 +-
 README.md                                     |  12 +-
 TODO.md                                       |   4 +-
 modules.json                                  | 327 ++++++++----------
 modules/kherronism/braker3/meta.yml           |   6 +-
 modules/kherronism/repeatmasker/meta.yml      |   7 +-
 .../genomegenerate/star-genomegenerate.diff   | 247 -------------
 7 files changed, 155 insertions(+), 450 deletions(-)
 delete mode 100644 modules/nf-core/star/genomegenerate/star-genomegenerate.diff

diff --git a/.nf-core.yml b/.nf-core.yml
index b1a7f0e..3805dc8 100644
--- a/.nf-core.yml
+++ b/.nf-core.yml
@@ -1 +1 @@
-repository_type: pipeline
\ No newline at end of file
+repository_type: pipeline
diff --git a/README.md b/README.md
index ea8b609..8efbcf0 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,5 @@
 # PANGENE
+
 A NextFlow pipeline for pan-genome annotation.
 
 ## Pipeline Flowchart
@@ -12,7 +13,7 @@ flowchart TD
     EDTA
     REPEATMASKER
     end
-    
+
     TARGET_ASSEMBLIES(["[target_assemblies]"])
     TE_LIBRARIES(["[te_libs]"])
     TARGET_ASSEMBLIES --> FASTA_VALIDATE
@@ -30,7 +31,7 @@ flowchart TD
     STAR
     SAMTOOLS_CAT
     end
-    
+
     SAMPLESHEET([samplesheet])
     SAMPLESHEET --> |Tech. reps|CAT_FASTQ
     CAT_FASTQ --> FASTQC
@@ -60,7 +61,7 @@ flowchart TD
     XREF_ANNOTATIONS --> |xref_fasta|LIFTOFF
     GFFREAD --> LIFTOFF
     anno_fasta --> |Fasta|LIFTOFF
-    
+
     EXTERNAL_PROTEIN_SEQS --> CAT
     anno_masked_fasta --> |Masked fasta|BRAKER3
     anno_bam --> |RNASeq bam|BRAKER3
@@ -76,10 +77,9 @@ flowchart TD
 Configure the pipeline by modifying `nextflow.config` and submit to SLURM for execution.
 
 ```bash
-sbatch ./pan_gene_pfr.sh 
+sbatch ./pan_gene_pfr.sh
 ```
 
-
 ## Third-party Sources
 
 Some software components of this pipeline have been adopted from following third-party sources:
@@ -94,4 +94,4 @@ Some software components of this pipeline have been adopted from following third
 
 2. nf-core/rnaseq [MIT](https://github.com/nf-core/rnaseq/blob/master/LICENSE): https://github.com/nf-core/rnaseq
 3. rewarewaannotation [MIT](https://github.com/kherronism/rewarewaannotation/blob/master/LICENSE): https://github.com/kherronism/rewarewaannotation
-4. assembly_qc [GPL-3.0](https://github.com/Plant-Food-Research-Open/assembly_qc/blob/main/LICENSE): https://github.com/Plant-Food-Research-Open/assembly_qc
\ No newline at end of file
+4. assembly_qc [GPL-3.0](https://github.com/Plant-Food-Research-Open/assembly_qc/blob/main/LICENSE): https://github.com/Plant-Food-Research-Open/assembly_qc
diff --git a/TODO.md b/TODO.md
index 0134c26..94f51c1 100644
--- a/TODO.md
+++ b/TODO.md
@@ -2,7 +2,7 @@
 - [ ] From Ross regarding post-processing:
 
 > [9:49 am] Ross Crowhurst
-Here is an easy one: BLATSp vs swissprot & Arabidpsis and check query is with set thresholds of reference - if so accept; If not move to BLASTp vs Uniref90 or Refeq (or some other predetermined model species) - same deal accept if within threshold limits. Else BLASTn of cds vs NCBI nt (really scrapping the bottom of the barrel here). If not a hit to anything then chances are its garbage and should be removed. Some ppl might try to claim its a unique protein to the genotype but in 20 years I have never seen one of those be supported - mostly this category is garbage. The screen agains NCBI nt also assists to classify "bits" as well retroposonss etc. Idea being you want to remove garbage predictions - as this does take time you can see why some papers just filter out by size.
+> Here is an easy one: BLATSp vs swissprot & Arabidpsis and check query is with set thresholds of reference - if so accept; If not move to BLASTp vs Uniref90 or Refeq (or some other predetermined model species) - same deal accept if within threshold limits. Else BLASTn of cds vs NCBI nt (really scrapping the bottom of the barrel here). If not a hit to anything then chances are its garbage and should be removed. Some ppl might try to claim its a unique protein to the genotype but in 20 years I have never seen one of those be supported - mostly this category is garbage. The screen agains NCBI nt also assists to classify "bits" as well retroposonss etc. Idea being you want to remove garbage predictions - as this does take time you can see why some papers just filter out by size.
 
 - [ ] From Cecilia:
 
@@ -12,4 +12,4 @@ Here is an easy one: BLATSp vs swissprot & Arabidpsis and check query is with se
 
 > https://www.biorxiv.org/content/10.1101/096529v2.full.pdf
 
-> Don't use `-exclude_partial`
\ No newline at end of file
+> Don't use `-exclude_partial`
diff --git a/modules.json b/modules.json
index b57ef90..dfc1e93 100644
--- a/modules.json
+++ b/modules.json
@@ -1,191 +1,142 @@
 {
-    "name": "PlantandFoodResearch/pangene",
-    "homePage": "https://github.com/PlantandFoodResearch/pangene",
-    "repos": {
-        "git@github.com:PlantandFoodResearch/nxf-modules.git": {
-            "modules": {
-                "pfr": {
-                    "custom/restoregffids": {
-                        "branch": "main",
-                        "git_sha": "e9f6bdd634bdbcd52c5568ba82f16176ec06631f",
-                        "installed_by": [
-                            "fasta_edta_lai",
-                            "modules"
-                        ]
-                    },
-                    "custom/shortenfastaids": {
-                        "branch": "main",
-                        "git_sha": "5e0e41b51d7fc7f68ae43692b6fe19b95d7f3a8c",
-                        "installed_by": [
-                            "fasta_edta_lai",
-                            "modules"
-                        ]
-                    },
-                    "edta/edta": {
-                        "branch": "main",
-                        "git_sha": "d14b1f1d790cc01c11f8ec5aa80a9562c6808b20",
-                        "installed_by": [
-                            "fasta_edta_lai",
-                            "modules"
-                        ]
-                    },
-                    "lai": {
-                        "branch": "main",
-                        "git_sha": "7e6e3cb41362a045c6bb6065903efa0eba246e87",
-                        "installed_by": [
-                            "fasta_edta_lai"
-                        ]
-                    },
-                    "liftoff": {
-                        "branch": "main",
-                        "git_sha": "3593ec100c92b656204bf739a51d62fd44d81f6f",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    }
-                }
-            },
-            "subworkflows": {
-                "pfr": {
-                    "fasta_edta_lai": {
-                        "branch": "main",
-                        "git_sha": "fde2f37998ba54ec9c0b5cf65a2f28f14af981b0",
-                        "installed_by": [
-                            "subworkflows"
-                        ]
-                    }
-                }
-            }
-        },
-        "git@github.com:kherronism/nf-modules.git": {
-            "modules": {
-                "kherronism": {
-                    "braker3": {
-                        "branch": "dev",
-                        "git_sha": "b01fec253f3b73b24e3f166a96d4beb49e58b0a6",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    },
-                    "repeatmasker": {
-                        "branch": "dev",
-                        "git_sha": "6778d5bb4c9f3d597753c699226fcde8d0811bfb",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    }
-                }
-            }
-        },
-        "https://github.com/nf-core/modules.git": {
-            "modules": {
-                "nf-core": {
-                    "cat/cat": {
-                        "branch": "master",
-                        "git_sha": "d593e8f6b7d1bbbb2acf43a4b9efeeac8d6720f2",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    },
-                    "cat/fastq": {
-                        "branch": "master",
-                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    },
-                    "custom/dumpsoftwareversions": {
-                        "branch": "master",
-                        "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    },
-                    "fastavalidator": {
-                        "branch": "master",
-                        "git_sha": "89ff95427f695086369d7927a3c17cea2a37a382",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    },
-                    "fastp": {
-                        "branch": "master",
-                        "git_sha": "3c77ca9aac783e76c3614a06db3bfe4fef619bde",
-                        "installed_by": [
-                            "fastq_fastqc_umitools_fastp"
-                        ]
-                    },
-                    "fastqc": {
-                        "branch": "master",
-                        "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53",
-                        "installed_by": [
-                            "fastq_fastqc_umitools_fastp",
-                            "modules"
-                        ]
-                    },
-                    "gffread": {
-                        "branch": "master",
-                        "git_sha": "a2d6c3082c5c44b4155a3246daff36701ee49af8",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    },
-                    "gunzip": {
-                        "branch": "master",
-                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    },
-                    "samtools/cat": {
-                        "branch": "master",
-                        "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    },
-                    "sortmerna": {
-                        "branch": "master",
-                        "git_sha": "a20b6b1e9114a08007608528e4a2b0fbbb8a9ca2",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    },
-                    "star/align": {
-                        "branch": "master",
-                        "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    },
-                    "star/genomegenerate": {
-                        "branch": "master",
-                        "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
-                        "installed_by": [
-                            "modules"
-                        ],
-                        "patch": "modules/nf-core/star/genomegenerate/star-genomegenerate.diff"
-                    },
-                    "umitools/extract": {
-                        "branch": "master",
-                        "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53",
-                        "installed_by": [
-                            "fastq_fastqc_umitools_fastp"
-                        ]
-                    }
-                }
-            },
-            "subworkflows": {
-                "nf-core": {
-                    "fastq_fastqc_umitools_fastp": {
-                        "branch": "master",
-                        "git_sha": "3e8b0c1144ccf60b7848efbdc2be285ff20b49ee",
-                        "installed_by": [
-                            "subworkflows"
-                        ]
-                    }
-                }
-            }
+  "name": "PlantandFoodResearch/pangene",
+  "homePage": "https://github.com/PlantandFoodResearch/pangene",
+  "repos": {
+    "git@github.com:PlantandFoodResearch/nxf-modules.git": {
+      "modules": {
+        "pfr": {
+          "custom/restoregffids": {
+            "branch": "main",
+            "git_sha": "e9f6bdd634bdbcd52c5568ba82f16176ec06631f",
+            "installed_by": ["fasta_edta_lai", "modules"]
+          },
+          "custom/shortenfastaids": {
+            "branch": "main",
+            "git_sha": "5e0e41b51d7fc7f68ae43692b6fe19b95d7f3a8c",
+            "installed_by": ["fasta_edta_lai", "modules"]
+          },
+          "edta/edta": {
+            "branch": "main",
+            "git_sha": "d14b1f1d790cc01c11f8ec5aa80a9562c6808b20",
+            "installed_by": ["fasta_edta_lai", "modules"]
+          },
+          "lai": {
+            "branch": "main",
+            "git_sha": "7e6e3cb41362a045c6bb6065903efa0eba246e87",
+            "installed_by": ["fasta_edta_lai"]
+          },
+          "liftoff": {
+            "branch": "main",
+            "git_sha": "3593ec100c92b656204bf739a51d62fd44d81f6f",
+            "installed_by": ["modules"]
+          }
         }
+      },
+      "subworkflows": {
+        "pfr": {
+          "fasta_edta_lai": {
+            "branch": "main",
+            "git_sha": "fde2f37998ba54ec9c0b5cf65a2f28f14af981b0",
+            "installed_by": ["subworkflows"]
+          }
+        }
+      }
+    },
+    "git@github.com:kherronism/nf-modules.git": {
+      "modules": {
+        "kherronism": {
+          "braker3": {
+            "branch": "dev",
+            "git_sha": "b01fec253f3b73b24e3f166a96d4beb49e58b0a6",
+            "installed_by": ["modules"]
+          },
+          "repeatmasker": {
+            "branch": "dev",
+            "git_sha": "6778d5bb4c9f3d597753c699226fcde8d0811bfb",
+            "installed_by": ["modules"]
+          }
+        }
+      }
+    },
+    "https://github.com/nf-core/modules.git": {
+      "modules": {
+        "nf-core": {
+          "cat/cat": {
+            "branch": "master",
+            "git_sha": "d593e8f6b7d1bbbb2acf43a4b9efeeac8d6720f2",
+            "installed_by": ["modules"]
+          },
+          "cat/fastq": {
+            "branch": "master",
+            "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+            "installed_by": ["modules"]
+          },
+          "custom/dumpsoftwareversions": {
+            "branch": "master",
+            "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e",
+            "installed_by": ["modules"]
+          },
+          "fastavalidator": {
+            "branch": "master",
+            "git_sha": "89ff95427f695086369d7927a3c17cea2a37a382",
+            "installed_by": ["modules"]
+          },
+          "fastp": {
+            "branch": "master",
+            "git_sha": "3c77ca9aac783e76c3614a06db3bfe4fef619bde",
+            "installed_by": ["fastq_fastqc_umitools_fastp"]
+          },
+          "fastqc": {
+            "branch": "master",
+            "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53",
+            "installed_by": ["fastq_fastqc_umitools_fastp", "modules"]
+          },
+          "gffread": {
+            "branch": "master",
+            "git_sha": "a2d6c3082c5c44b4155a3246daff36701ee49af8",
+            "installed_by": ["modules"]
+          },
+          "gunzip": {
+            "branch": "master",
+            "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+            "installed_by": ["modules"]
+          },
+          "samtools/cat": {
+            "branch": "master",
+            "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
+            "installed_by": ["modules"]
+          },
+          "sortmerna": {
+            "branch": "master",
+            "git_sha": "a20b6b1e9114a08007608528e4a2b0fbbb8a9ca2",
+            "installed_by": ["modules"]
+          },
+          "star/align": {
+            "branch": "master",
+            "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
+            "installed_by": ["modules"]
+          },
+          "star/genomegenerate": {
+            "branch": "master",
+            "git_sha": "d87a6e2156c2099c09280fa70776eaf0a824817a",
+            "installed_by": ["modules"]
+          },
+          "umitools/extract": {
+            "branch": "master",
+            "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53",
+            "installed_by": ["fastq_fastqc_umitools_fastp"]
+          }
+        }
+      },
+      "subworkflows": {
+        "nf-core": {
+          "fastq_fastqc_umitools_fastp": {
+            "branch": "master",
+            "git_sha": "3e8b0c1144ccf60b7848efbdc2be285ff20b49ee",
+            "installed_by": ["subworkflows"]
+          }
+        }
+      }
     }
-}
\ No newline at end of file
+  }
+}
diff --git a/modules/kherronism/braker3/meta.yml b/modules/kherronism/braker3/meta.yml
index 9bc13a3..ed4da40 100644
--- a/modules/kherronism/braker3/meta.yml
+++ b/modules/kherronism/braker3/meta.yml
@@ -1,6 +1,6 @@
 name: braker3
 description: |
-    Gene prediction in novel genomes using RNA-seq and protein homology information
+  Gene prediction in novel genomes using RNA-seq and protein homology information
 keywords:
   - genome
   - annotation
@@ -8,8 +8,8 @@ keywords:
 tools:
   - braker3:
       description: "BRAKER3 is a pipeline for fully automated prediction of
-          protein coding gene structures using protein and RNA-seq and protein homology
-          information"
+        protein coding gene structures using protein and RNA-seq and protein homology
+        information"
       homepage: "https://github.com/Gaius-Augustus/BRAKER"
       documentation: "https://github.com/Gaius-Augustus/BRAKER"
       tool_dev_url: "https://github.com/Gaius-Augustus/BRAKER"
diff --git a/modules/kherronism/repeatmasker/meta.yml b/modules/kherronism/repeatmasker/meta.yml
index 8adeb55..0cab608 100644
--- a/modules/kherronism/repeatmasker/meta.yml
+++ b/modules/kherronism/repeatmasker/meta.yml
@@ -1,6 +1,6 @@
 name: repeatmasker
 description: |
-  Screening DNA sequences for interspersed repeats and low complexity DNA sequences.
+  Screening DNA sequences for interspersed repeats and low complexity DNA sequences
 
 keywords:
   - genome
@@ -9,8 +9,9 @@ keywords:
 
 tools:
   - repeatmasker:
-      description: "RepeatMasker is a program that screens DNA sequences for interspersed
-      repeats and low complexity DNA sequences."
+      description: |
+        RepeatMasker is a program that screens DNA sequences for interspersed
+        repeats and low complexity DNA sequences
       homepage: "https://www.repeatmasker.org/"
       documentation: "https://www.repeatmasker.org/webrepeatmaskerhelp.html"
       tool_dev_url: "https://github.com/rmhubley/RepeatMasker"
diff --git a/modules/nf-core/star/genomegenerate/star-genomegenerate.diff b/modules/nf-core/star/genomegenerate/star-genomegenerate.diff
deleted file mode 100644
index 0181f46..0000000
--- a/modules/nf-core/star/genomegenerate/star-genomegenerate.diff
+++ /dev/null
@@ -1,247 +0,0 @@
-Changes in module 'nf-core/star/genomegenerate'
---- modules/nf-core/star/genomegenerate/environment.yml
-+++ modules/nf-core/star/genomegenerate/environment.yml
-@@ -1,9 +1,11 @@
- name: star_genomegenerate
-+
- channels:
-   - conda-forge
-   - bioconda
-   - defaults
-+
- dependencies:
-+  - bioconda::samtools=1.18
-   - bioconda::star=2.7.10a
--  - bioconda::samtools=1.18
-   - conda-forge::gawk=5.1.0
-
---- modules/nf-core/star/genomegenerate/main.nf
-+++ modules/nf-core/star/genomegenerate/main.nf
-@@ -19,9 +19,10 @@
-     task.ext.when == null || task.ext.when
- 
-     script:
--    def args = task.ext.args ?: ''
--    def args_list = args.tokenize()
--    def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : ''
-+    def args        = task.ext.args ?: ''
-+    def args_list   = args.tokenize()
-+    def memory      = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : ''
-+    def include_gtf = gtf ? "--sjdbGTFfile $gtf" : ''
-     if (args_list.contains('--genomeSAindexNbases')) {
-         """
-         mkdir star
-@@ -29,7 +30,7 @@
-             --runMode genomeGenerate \\
-             --genomeDir star/ \\
-             --genomeFastaFiles $fasta \\
--            --sjdbGTFfile $gtf \\
-+            $include_gtf \\
-             --runThreadN $task.cpus \\
-             $memory \\
-             $args
-@@ -51,7 +52,7 @@
-             --runMode genomeGenerate \\
-             --genomeDir star/ \\
-             --genomeFastaFiles $fasta \\
--            --sjdbGTFfile $gtf \\
-+            $include_gtf \\
-             --runThreadN $task.cpus \\
-             --genomeSAindexNbases \$NUM_BASES \\
-             $memory \\
-@@ -67,30 +68,52 @@
-     }
- 
-     stub:
--    """
--    mkdir star
--    touch star/Genome
--    touch star/Log.out
--    touch star/SA
--    touch star/SAindex
--    touch star/chrLength.txt
--    touch star/chrName.txt
--    touch star/chrNameLength.txt
--    touch star/chrStart.txt
--    touch star/exonGeTrInfo.tab
--    touch star/exonInfo.tab
--    touch star/geneInfo.tab
--    touch star/genomeParameters.txt
--    touch star/sjdbInfo.txt
--    touch star/sjdbList.fromGTF.out.tab
--    touch star/sjdbList.out.tab
--    touch star/transcriptInfo.tab
-+    if (gtf) {
-+        """
-+        mkdir star
-+        touch star/Genome
-+        touch star/Log.out
-+        touch star/SA
-+        touch star/SAindex
-+        touch star/chrLength.txt
-+        touch star/chrName.txt
-+        touch star/chrNameLength.txt
-+        touch star/chrStart.txt
-+        touch star/exonGeTrInfo.tab
-+        touch star/exonInfo.tab
-+        touch star/geneInfo.tab
-+        touch star/genomeParameters.txt
-+        touch star/sjdbInfo.txt
-+        touch star/sjdbList.fromGTF.out.tab
-+        touch star/sjdbList.out.tab
-+        touch star/transcriptInfo.tab
- 
--    cat <<-END_VERSIONS > versions.yml
--    "${task.process}":
--        star: \$(STAR --version | sed -e "s/STAR_//g")
--        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
--        gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//')
--    END_VERSIONS
--    """
-+        cat <<-END_VERSIONS > versions.yml
-+        "${task.process}":
-+            star: \$(STAR --version | sed -e "s/STAR_//g")
-+            samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
-+            gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//')
-+        END_VERSIONS
-+        """
-+    } else {
-+        """
-+        mkdir star
-+        touch star/Genome
-+        touch star/Log.out
-+        touch star/SA
-+        touch star/SAindex
-+        touch star/chrLength.txt
-+        touch star/chrName.txt
-+        touch star/chrNameLength.txt
-+        touch star/chrStart.txt
-+        touch star/genomeParameters.txt
-+
-+        cat <<-END_VERSIONS > versions.yml
-+        "${task.process}":
-+            star: \$(STAR --version | sed -e "s/STAR_//g")
-+            samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
-+            gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//')
-+        END_VERSIONS
-+        """
-+    }
- }
-
---- modules/nf-core/star/genomegenerate/tests/main.nf.test.snap
-+++ modules/nf-core/star/genomegenerate/tests/main.nf.test.snap
-@@ -5,12 +5,18 @@
-                 "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f"
-             ]
-         ],
--        "timestamp": "2023-12-04T18:01:27.298248806"
-+        "timestamp": "2023-12-19T11:05:51.741109"
-     },
--    "index": {
-+    "index_with_gtf": {
-         "content": [
--            "star"
-+            "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, exonGeTrInfo.tab, exonInfo.tab, geneInfo.tab, genomeParameters.txt, sjdbInfo.txt, sjdbList.fromGTF.out.tab, sjdbList.out.tab, transcriptInfo.tab]"
-         ],
--        "timestamp": "2023-11-23T11:31:47.560528"
-+        "timestamp": "2023-12-19T11:38:14.551548"
-+    },
-+    "index_without_gtf": {
-+        "content": [
-+            "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, genomeParameters.txt]"
-+        ],
-+        "timestamp": "2023-12-19T11:38:22.382905"
-     }
- }
---- modules/nf-core/star/genomegenerate/tests/main.nf.test
-+++ modules/nf-core/star/genomegenerate/tests/main.nf.test
-@@ -28,7 +28,86 @@
-         then {
-             assertAll(
-                 { assert process.success },
--                { assert snapshot(file(process.out.index[0][1]).name).match("index") },
-+                { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_with_gtf") },
-+                { assert snapshot(process.out.versions).match("versions") }
-+            )
-+        }
-+
-+    }
-+
-+    test("homo_sapiens-stub") {
-+
-+        options '-stub'
-+
-+        when {
-+            process {
-+                """
-+                input[0] = Channel.of([
-+                    [ id:'test_fasta' ],
-+                    [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
-+                ])
-+                input[1] = Channel.of([
-+                    [ id:'test_gtf' ],
-+                    [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
-+                ])
-+                """
-+            }
-+        }
-+
-+        then {
-+            assertAll(
-+                { assert process.success },
-+                { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_with_gtf") },
-+                { assert snapshot(process.out.versions).match("versions") }
-+            )
-+        }
-+
-+    }
-+
-+    test("homo_sapiens-without_gtf") {
-+
-+        when {
-+            process {
-+                """
-+                input[0] = Channel.of([
-+                    [ id:'test_fasta' ],
-+                    [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
-+                ])
-+                input[1] = Channel.of([ [], [] ])
-+                """
-+            }
-+        }
-+
-+        then {
-+            assertAll(
-+                { assert process.success },
-+                { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_without_gtf") },
-+                { assert snapshot(process.out.versions).match("versions") }
-+            )
-+        }
-+
-+    }
-+
-+    test("homo_sapiens-without_gtf-stub") {
-+
-+        options '-stub'
-+
-+        when {
-+            process {
-+                """
-+                input[0] = Channel.of([
-+                    [ id:'test_fasta' ],
-+                    [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
-+                ])
-+                input[1] = Channel.of([ [], [] ])
-+                """
-+            }
-+        }
-+
-+        then {
-+            assertAll(
-+                { assert process.success },
-+                { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_without_gtf") },
-                 { assert snapshot(process.out.versions).match("versions") }
-             )
-         }
-
-************************************************************

From c526933df6406fae336c799fd2e1c2c725916dcb Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Thu, 21 Dec 2023 10:23:21 +1300
Subject: [PATCH 32/59] FASTP now has stub

---
 modules.json                                  | 327 ++++++++++--------
 modules/nf-core/fastp/fastp.diff              |  28 ++
 modules/nf-core/fastp/main.nf                 |  18 +
 .../fastq_fastqc_umitools_fastp/main.nf       |   3 +
 4 files changed, 237 insertions(+), 139 deletions(-)
 create mode 100644 modules/nf-core/fastp/fastp.diff

diff --git a/modules.json b/modules.json
index dfc1e93..dc6674b 100644
--- a/modules.json
+++ b/modules.json
@@ -1,142 +1,191 @@
 {
-  "name": "PlantandFoodResearch/pangene",
-  "homePage": "https://github.com/PlantandFoodResearch/pangene",
-  "repos": {
-    "git@github.com:PlantandFoodResearch/nxf-modules.git": {
-      "modules": {
-        "pfr": {
-          "custom/restoregffids": {
-            "branch": "main",
-            "git_sha": "e9f6bdd634bdbcd52c5568ba82f16176ec06631f",
-            "installed_by": ["fasta_edta_lai", "modules"]
-          },
-          "custom/shortenfastaids": {
-            "branch": "main",
-            "git_sha": "5e0e41b51d7fc7f68ae43692b6fe19b95d7f3a8c",
-            "installed_by": ["fasta_edta_lai", "modules"]
-          },
-          "edta/edta": {
-            "branch": "main",
-            "git_sha": "d14b1f1d790cc01c11f8ec5aa80a9562c6808b20",
-            "installed_by": ["fasta_edta_lai", "modules"]
-          },
-          "lai": {
-            "branch": "main",
-            "git_sha": "7e6e3cb41362a045c6bb6065903efa0eba246e87",
-            "installed_by": ["fasta_edta_lai"]
-          },
-          "liftoff": {
-            "branch": "main",
-            "git_sha": "3593ec100c92b656204bf739a51d62fd44d81f6f",
-            "installed_by": ["modules"]
-          }
+    "name": "PlantandFoodResearch/pangene",
+    "homePage": "https://github.com/PlantandFoodResearch/pangene",
+    "repos": {
+        "git@github.com:PlantandFoodResearch/nxf-modules.git": {
+            "modules": {
+                "pfr": {
+                    "custom/restoregffids": {
+                        "branch": "main",
+                        "git_sha": "e9f6bdd634bdbcd52c5568ba82f16176ec06631f",
+                        "installed_by": [
+                            "fasta_edta_lai",
+                            "modules"
+                        ]
+                    },
+                    "custom/shortenfastaids": {
+                        "branch": "main",
+                        "git_sha": "5e0e41b51d7fc7f68ae43692b6fe19b95d7f3a8c",
+                        "installed_by": [
+                            "fasta_edta_lai",
+                            "modules"
+                        ]
+                    },
+                    "edta/edta": {
+                        "branch": "main",
+                        "git_sha": "d14b1f1d790cc01c11f8ec5aa80a9562c6808b20",
+                        "installed_by": [
+                            "fasta_edta_lai",
+                            "modules"
+                        ]
+                    },
+                    "lai": {
+                        "branch": "main",
+                        "git_sha": "7e6e3cb41362a045c6bb6065903efa0eba246e87",
+                        "installed_by": [
+                            "fasta_edta_lai"
+                        ]
+                    },
+                    "liftoff": {
+                        "branch": "main",
+                        "git_sha": "3593ec100c92b656204bf739a51d62fd44d81f6f",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    }
+                }
+            },
+            "subworkflows": {
+                "pfr": {
+                    "fasta_edta_lai": {
+                        "branch": "main",
+                        "git_sha": "fde2f37998ba54ec9c0b5cf65a2f28f14af981b0",
+                        "installed_by": [
+                            "subworkflows"
+                        ]
+                    }
+                }
+            }
+        },
+        "git@github.com:kherronism/nf-modules.git": {
+            "modules": {
+                "kherronism": {
+                    "braker3": {
+                        "branch": "dev",
+                        "git_sha": "b01fec253f3b73b24e3f166a96d4beb49e58b0a6",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "repeatmasker": {
+                        "branch": "dev",
+                        "git_sha": "6778d5bb4c9f3d597753c699226fcde8d0811bfb",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    }
+                }
+            }
+        },
+        "https://github.com/nf-core/modules.git": {
+            "modules": {
+                "nf-core": {
+                    "cat/cat": {
+                        "branch": "master",
+                        "git_sha": "d593e8f6b7d1bbbb2acf43a4b9efeeac8d6720f2",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "cat/fastq": {
+                        "branch": "master",
+                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "custom/dumpsoftwareversions": {
+                        "branch": "master",
+                        "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "fastavalidator": {
+                        "branch": "master",
+                        "git_sha": "89ff95427f695086369d7927a3c17cea2a37a382",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "fastp": {
+                        "branch": "master",
+                        "git_sha": "3c77ca9aac783e76c3614a06db3bfe4fef619bde",
+                        "installed_by": [
+                            "fastq_fastqc_umitools_fastp"
+                        ],
+                        "patch": "modules/nf-core/fastp/fastp.diff"
+                    },
+                    "fastqc": {
+                        "branch": "master",
+                        "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53",
+                        "installed_by": [
+                            "fastq_fastqc_umitools_fastp",
+                            "modules"
+                        ]
+                    },
+                    "gffread": {
+                        "branch": "master",
+                        "git_sha": "a2d6c3082c5c44b4155a3246daff36701ee49af8",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "gunzip": {
+                        "branch": "master",
+                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "samtools/cat": {
+                        "branch": "master",
+                        "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "sortmerna": {
+                        "branch": "master",
+                        "git_sha": "a20b6b1e9114a08007608528e4a2b0fbbb8a9ca2",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "star/align": {
+                        "branch": "master",
+                        "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "star/genomegenerate": {
+                        "branch": "master",
+                        "git_sha": "d87a6e2156c2099c09280fa70776eaf0a824817a",
+                        "installed_by": [
+                            "modules"
+                        ]
+                    },
+                    "umitools/extract": {
+                        "branch": "master",
+                        "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53",
+                        "installed_by": [
+                            "fastq_fastqc_umitools_fastp"
+                        ]
+                    }
+                }
+            },
+            "subworkflows": {
+                "nf-core": {
+                    "fastq_fastqc_umitools_fastp": {
+                        "branch": "master",
+                        "git_sha": "3e8b0c1144ccf60b7848efbdc2be285ff20b49ee",
+                        "installed_by": [
+                            "subworkflows"
+                        ]
+                    }
+                }
+            }
         }
-      },
-      "subworkflows": {
-        "pfr": {
-          "fasta_edta_lai": {
-            "branch": "main",
-            "git_sha": "fde2f37998ba54ec9c0b5cf65a2f28f14af981b0",
-            "installed_by": ["subworkflows"]
-          }
-        }
-      }
-    },
-    "git@github.com:kherronism/nf-modules.git": {
-      "modules": {
-        "kherronism": {
-          "braker3": {
-            "branch": "dev",
-            "git_sha": "b01fec253f3b73b24e3f166a96d4beb49e58b0a6",
-            "installed_by": ["modules"]
-          },
-          "repeatmasker": {
-            "branch": "dev",
-            "git_sha": "6778d5bb4c9f3d597753c699226fcde8d0811bfb",
-            "installed_by": ["modules"]
-          }
-        }
-      }
-    },
-    "https://github.com/nf-core/modules.git": {
-      "modules": {
-        "nf-core": {
-          "cat/cat": {
-            "branch": "master",
-            "git_sha": "d593e8f6b7d1bbbb2acf43a4b9efeeac8d6720f2",
-            "installed_by": ["modules"]
-          },
-          "cat/fastq": {
-            "branch": "master",
-            "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-            "installed_by": ["modules"]
-          },
-          "custom/dumpsoftwareversions": {
-            "branch": "master",
-            "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e",
-            "installed_by": ["modules"]
-          },
-          "fastavalidator": {
-            "branch": "master",
-            "git_sha": "89ff95427f695086369d7927a3c17cea2a37a382",
-            "installed_by": ["modules"]
-          },
-          "fastp": {
-            "branch": "master",
-            "git_sha": "3c77ca9aac783e76c3614a06db3bfe4fef619bde",
-            "installed_by": ["fastq_fastqc_umitools_fastp"]
-          },
-          "fastqc": {
-            "branch": "master",
-            "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53",
-            "installed_by": ["fastq_fastqc_umitools_fastp", "modules"]
-          },
-          "gffread": {
-            "branch": "master",
-            "git_sha": "a2d6c3082c5c44b4155a3246daff36701ee49af8",
-            "installed_by": ["modules"]
-          },
-          "gunzip": {
-            "branch": "master",
-            "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-            "installed_by": ["modules"]
-          },
-          "samtools/cat": {
-            "branch": "master",
-            "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
-            "installed_by": ["modules"]
-          },
-          "sortmerna": {
-            "branch": "master",
-            "git_sha": "a20b6b1e9114a08007608528e4a2b0fbbb8a9ca2",
-            "installed_by": ["modules"]
-          },
-          "star/align": {
-            "branch": "master",
-            "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
-            "installed_by": ["modules"]
-          },
-          "star/genomegenerate": {
-            "branch": "master",
-            "git_sha": "d87a6e2156c2099c09280fa70776eaf0a824817a",
-            "installed_by": ["modules"]
-          },
-          "umitools/extract": {
-            "branch": "master",
-            "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53",
-            "installed_by": ["fastq_fastqc_umitools_fastp"]
-          }
-        }
-      },
-      "subworkflows": {
-        "nf-core": {
-          "fastq_fastqc_umitools_fastp": {
-            "branch": "master",
-            "git_sha": "3e8b0c1144ccf60b7848efbdc2be285ff20b49ee",
-            "installed_by": ["subworkflows"]
-          }
-        }
-      }
     }
-  }
-}
+}
\ No newline at end of file
diff --git a/modules/nf-core/fastp/fastp.diff b/modules/nf-core/fastp/fastp.diff
new file mode 100644
index 0000000..4213043
--- /dev/null
+++ b/modules/nf-core/fastp/fastp.diff
@@ -0,0 +1,28 @@
+Changes in module 'nf-core/fastp'
+--- modules/nf-core/fastp/main.nf
++++ modules/nf-core/fastp/main.nf
+@@ -99,4 +99,22 @@
+         END_VERSIONS
+         """
+     }
++    
++    stub:
++    def prefix              = task.ext.prefix ?: "${meta.id}"
++    def is_single_output    = task.ext.args?.contains('--interleaved_in') || meta.single_end
++    def touch_reads         = is_single_output ? "${prefix}.fastp.fastq.gz" : "${prefix}_1.fastp.fastq.gz ${prefix}_2.fastp.fastq.gz"
++    def touch_merged        = (!is_single_output && save_merged) ? "touch ${prefix}.merged.fastq.gz" : ""
++    """
++    touch $touch_reads
++    touch "${prefix}.fastp.json"
++    touch "${prefix}.fastp.html"
++    touch "${prefix}.fastp.log"
++    $touch_merged
++
++    cat <<-END_VERSIONS > versions.yml
++    "${task.process}":
++        fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
++    END_VERSIONS
++    """
+ }
+
+************************************************************
diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf
index 5fac3c1..1f56640 100644
--- a/modules/nf-core/fastp/main.nf
+++ b/modules/nf-core/fastp/main.nf
@@ -99,4 +99,22 @@ process FASTP {
         END_VERSIONS
         """
     }
+    
+    stub:
+    def prefix              = task.ext.prefix ?: "${meta.id}"
+    def is_single_output    = task.ext.args?.contains('--interleaved_in') || meta.single_end
+    def touch_reads         = is_single_output ? "${prefix}.fastp.fastq.gz" : "${prefix}_1.fastp.fastq.gz ${prefix}_2.fastp.fastq.gz"
+    def touch_merged        = (!is_single_output && save_merged) ? "touch ${prefix}.merged.fastq.gz" : ""
+    """
+    touch $touch_reads
+    touch "${prefix}.fastp.json"
+    touch "${prefix}.fastp.html"
+    touch "${prefix}.fastp.log"
+    $touch_merged
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
+    END_VERSIONS
+    """
 }
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf
index 3dbb27e..711210f 100644
--- a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf
@@ -13,6 +13,9 @@ include { FASTP                 } from '../../../modules/nf-core/fastp/main'
 import groovy.json.JsonSlurper
 
 def getFastpReadsAfterFiltering(json_file) {
+
+    if (!json_file.text) { return 0 } // Usman Rashid: To allow -stub with FASTP
+
     def Map json = (Map) new JsonSlurper().parseText(json_file.text).get('summary')
     return json['after_filtering']['total_reads'].toLong()
 }

From ac45bbb4b07afa2f1997b80aaa25a6d85a8c395e Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Thu, 21 Dec 2023 12:38:15 +1300
Subject: [PATCH 33/59] SORTMERNA now has stub

---
 modules.json                                  |  3 +-
 modules/nf-core/sortmerna/main.nf             | 26 ++++++++++++++
 modules/nf-core/sortmerna/sortmerna.diff      | 36 +++++++++++++++++++
 nextflow.config                               |  8 ++---
 subworkflows/local/preprocess_rnaseq.nf       |  8 +++++
 .../fastq_fastqc_umitools_fastp/main.nf       |  6 ++--
 6 files changed, 79 insertions(+), 8 deletions(-)
 create mode 100644 modules/nf-core/sortmerna/sortmerna.diff

diff --git a/modules.json b/modules.json
index dc6674b..7cf4c73 100644
--- a/modules.json
+++ b/modules.json
@@ -150,7 +150,8 @@
                         "git_sha": "a20b6b1e9114a08007608528e4a2b0fbbb8a9ca2",
                         "installed_by": [
                             "modules"
-                        ]
+                        ],
+                        "patch": "modules/nf-core/sortmerna/sortmerna.diff"
                     },
                     "star/align": {
                         "branch": "master",
diff --git a/modules/nf-core/sortmerna/main.nf b/modules/nf-core/sortmerna/main.nf
index 53ccb97..909a7b1 100644
--- a/modules/nf-core/sortmerna/main.nf
+++ b/modules/nf-core/sortmerna/main.nf
@@ -67,4 +67,30 @@ process SORTMERNA {
         END_VERSIONS
         """
     }
+
+    stub:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    if (meta.single_end) {
+        """
+        touch ${prefix}.non_rRNA.fastq.gz
+        touch ${prefix}.sortmerna.log
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            sortmerna: \$(echo \$(sortmerna --version 2>&1) | sed 's/^.*SortMeRNA version //; s/ Build Date.*\$//')
+        END_VERSIONS
+        """
+    } else {
+        """
+        touch ${prefix}_1.non_rRNA.fastq.gz
+        touch ${prefix}_2.non_rRNA.fastq.gz
+        touch ${prefix}.sortmerna.log
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            sortmerna: \$(echo \$(sortmerna --version 2>&1) | sed 's/^.*SortMeRNA version //; s/ Build Date.*\$//')
+        END_VERSIONS
+        """
+    }
 }
diff --git a/modules/nf-core/sortmerna/sortmerna.diff b/modules/nf-core/sortmerna/sortmerna.diff
new file mode 100644
index 0000000..66d58d5
--- /dev/null
+++ b/modules/nf-core/sortmerna/sortmerna.diff
@@ -0,0 +1,36 @@
+Changes in module 'nf-core/sortmerna'
+--- modules/nf-core/sortmerna/main.nf
++++ modules/nf-core/sortmerna/main.nf
+@@ -67,4 +67,30 @@
+         END_VERSIONS
+         """
+     }
++
++    stub:
++    def args = task.ext.args ?: ''
++    def prefix = task.ext.prefix ?: "${meta.id}"
++    if (meta.single_end) {
++        """
++        touch ${prefix}.non_rRNA.fastq.gz
++        touch ${prefix}.sortmerna.log
++
++        cat <<-END_VERSIONS > versions.yml
++        "${task.process}":
++            sortmerna: \$(echo \$(sortmerna --version 2>&1) | sed 's/^.*SortMeRNA version //; s/ Build Date.*\$//')
++        END_VERSIONS
++        """
++    } else {
++        """
++        touch ${prefix}_1.non_rRNA.fastq.gz
++        touch ${prefix}_2.non_rRNA.fastq.gz
++        touch ${prefix}.sortmerna.log
++
++        cat <<-END_VERSIONS > versions.yml
++        "${task.process}":
++            sortmerna: \$(echo \$(sortmerna --version 2>&1) | sed 's/^.*SortMeRNA version //; s/ Build Date.*\$//')
++        END_VERSIONS
++        """
++    }
+ }
+
+************************************************************
diff --git a/nextflow.config b/nextflow.config
index d861331..587694a 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -33,16 +33,16 @@ params {
     min_trimmed_reads           = 10000
     extra_fastp_args            = ""
 
-    save_trimmed                = false
+    save_trimmed                = true
     // toggling this parameter results in rerun of FASTP and FASTQC_TRIM
 
-    remove_ribo_rna             = false
-    save_non_ribo_reads         = false
+    remove_ribo_rna             = true
+    save_non_ribo_reads         = true
     ribo_database_manifest      = "${projectDir}/assets/rrna-db-defaults.txt"
 
     star_max_intron_length      = 16000
     star_align_extra_args       = ""
-    star_save_outputs           = false
+    star_save_outputs           = true
 
     external_protein_fastas     = [
         "/workspace/ComparativeDataSources/OrthoDB11/Viridiplantae.fa.gz",
diff --git a/subworkflows/local/preprocess_rnaseq.nf b/subworkflows/local/preprocess_rnaseq.nf
index 7a82786..9184808 100644
--- a/subworkflows/local/preprocess_rnaseq.nf
+++ b/subworkflows/local/preprocess_rnaseq.nf
@@ -70,6 +70,14 @@ workflow PREPROCESS_RNASEQ {
     .reads
     | set { ch_trim_reads }
 
+    ch_cat_fastq
+    | join(ch_trim_reads, remainder:true)
+    | map { meta, reads, trimmed ->
+        if (!trimmed) {
+            System.err.println("WARNING: Dropping ${reads.collect { it.getName() }} as read count after trimming is less than $min_trimmed_reads")
+        }
+    }
+
     // MODULE: SORTMERNA
     if (remove_ribo_rna) {
         SORTMERNA (
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf
index 711210f..2c67b3c 100644
--- a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf
@@ -12,9 +12,9 @@ include { FASTP                 } from '../../../modules/nf-core/fastp/main'
 //
 import groovy.json.JsonSlurper
 
-def getFastpReadsAfterFiltering(json_file) {
+def getFastpReadsAfterFiltering(json_file, min_trimmed_reads) {
 
-    if (!json_file.text) { return 0 } // Usman Rashid: To allow -stub with FASTP
+    if (!json_file.text) { return min_trimmed_reads } // Usman Rashid: To allow -stub with FASTP
 
     def Map json = (Map) new JsonSlurper().parseText(json_file.text).get('summary')
     return json['after_filtering']['total_reads'].toLong()
@@ -99,7 +99,7 @@ workflow FASTQ_FASTQC_UMITOOLS_FASTP {
             .out
             .reads
             .join(trim_json)
-            .map { meta, reads, json -> [ meta, reads, getFastpReadsAfterFiltering(json) ] }
+            .map { meta, reads, json -> [ meta, reads, getFastpReadsAfterFiltering(json, min_trimmed_reads) ] }
             .set { ch_num_trimmed_reads }
 
         ch_num_trimmed_reads

From ed6aa33f6b775bb436957b8d9a3d40d41c85a30b Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Thu, 21 Dec 2023 13:09:44 +1300
Subject: [PATCH 34/59] Cleaned up prepare_assembly

---
 modules.json                           | 330 +++++++++++--------------
 subworkflows/local/prepare_assembly.nf | 134 +++++-----
 2 files changed, 204 insertions(+), 260 deletions(-)

diff --git a/modules.json b/modules.json
index 7cf4c73..299e449 100644
--- a/modules.json
+++ b/modules.json
@@ -1,192 +1,144 @@
 {
-    "name": "PlantandFoodResearch/pangene",
-    "homePage": "https://github.com/PlantandFoodResearch/pangene",
-    "repos": {
-        "git@github.com:PlantandFoodResearch/nxf-modules.git": {
-            "modules": {
-                "pfr": {
-                    "custom/restoregffids": {
-                        "branch": "main",
-                        "git_sha": "e9f6bdd634bdbcd52c5568ba82f16176ec06631f",
-                        "installed_by": [
-                            "fasta_edta_lai",
-                            "modules"
-                        ]
-                    },
-                    "custom/shortenfastaids": {
-                        "branch": "main",
-                        "git_sha": "5e0e41b51d7fc7f68ae43692b6fe19b95d7f3a8c",
-                        "installed_by": [
-                            "fasta_edta_lai",
-                            "modules"
-                        ]
-                    },
-                    "edta/edta": {
-                        "branch": "main",
-                        "git_sha": "d14b1f1d790cc01c11f8ec5aa80a9562c6808b20",
-                        "installed_by": [
-                            "fasta_edta_lai",
-                            "modules"
-                        ]
-                    },
-                    "lai": {
-                        "branch": "main",
-                        "git_sha": "7e6e3cb41362a045c6bb6065903efa0eba246e87",
-                        "installed_by": [
-                            "fasta_edta_lai"
-                        ]
-                    },
-                    "liftoff": {
-                        "branch": "main",
-                        "git_sha": "3593ec100c92b656204bf739a51d62fd44d81f6f",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    }
-                }
-            },
-            "subworkflows": {
-                "pfr": {
-                    "fasta_edta_lai": {
-                        "branch": "main",
-                        "git_sha": "fde2f37998ba54ec9c0b5cf65a2f28f14af981b0",
-                        "installed_by": [
-                            "subworkflows"
-                        ]
-                    }
-                }
-            }
-        },
-        "git@github.com:kherronism/nf-modules.git": {
-            "modules": {
-                "kherronism": {
-                    "braker3": {
-                        "branch": "dev",
-                        "git_sha": "b01fec253f3b73b24e3f166a96d4beb49e58b0a6",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    },
-                    "repeatmasker": {
-                        "branch": "dev",
-                        "git_sha": "6778d5bb4c9f3d597753c699226fcde8d0811bfb",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    }
-                }
-            }
-        },
-        "https://github.com/nf-core/modules.git": {
-            "modules": {
-                "nf-core": {
-                    "cat/cat": {
-                        "branch": "master",
-                        "git_sha": "d593e8f6b7d1bbbb2acf43a4b9efeeac8d6720f2",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    },
-                    "cat/fastq": {
-                        "branch": "master",
-                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    },
-                    "custom/dumpsoftwareversions": {
-                        "branch": "master",
-                        "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    },
-                    "fastavalidator": {
-                        "branch": "master",
-                        "git_sha": "89ff95427f695086369d7927a3c17cea2a37a382",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    },
-                    "fastp": {
-                        "branch": "master",
-                        "git_sha": "3c77ca9aac783e76c3614a06db3bfe4fef619bde",
-                        "installed_by": [
-                            "fastq_fastqc_umitools_fastp"
-                        ],
-                        "patch": "modules/nf-core/fastp/fastp.diff"
-                    },
-                    "fastqc": {
-                        "branch": "master",
-                        "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53",
-                        "installed_by": [
-                            "fastq_fastqc_umitools_fastp",
-                            "modules"
-                        ]
-                    },
-                    "gffread": {
-                        "branch": "master",
-                        "git_sha": "a2d6c3082c5c44b4155a3246daff36701ee49af8",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    },
-                    "gunzip": {
-                        "branch": "master",
-                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    },
-                    "samtools/cat": {
-                        "branch": "master",
-                        "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    },
-                    "sortmerna": {
-                        "branch": "master",
-                        "git_sha": "a20b6b1e9114a08007608528e4a2b0fbbb8a9ca2",
-                        "installed_by": [
-                            "modules"
-                        ],
-                        "patch": "modules/nf-core/sortmerna/sortmerna.diff"
-                    },
-                    "star/align": {
-                        "branch": "master",
-                        "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    },
-                    "star/genomegenerate": {
-                        "branch": "master",
-                        "git_sha": "d87a6e2156c2099c09280fa70776eaf0a824817a",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    },
-                    "umitools/extract": {
-                        "branch": "master",
-                        "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53",
-                        "installed_by": [
-                            "fastq_fastqc_umitools_fastp"
-                        ]
-                    }
-                }
-            },
-            "subworkflows": {
-                "nf-core": {
-                    "fastq_fastqc_umitools_fastp": {
-                        "branch": "master",
-                        "git_sha": "3e8b0c1144ccf60b7848efbdc2be285ff20b49ee",
-                        "installed_by": [
-                            "subworkflows"
-                        ]
-                    }
-                }
-            }
+  "name": "PlantandFoodResearch/pangene",
+  "homePage": "https://github.com/PlantandFoodResearch/pangene",
+  "repos": {
+    "git@github.com:PlantandFoodResearch/nxf-modules.git": {
+      "modules": {
+        "pfr": {
+          "custom/restoregffids": {
+            "branch": "main",
+            "git_sha": "e9f6bdd634bdbcd52c5568ba82f16176ec06631f",
+            "installed_by": ["fasta_edta_lai", "modules"]
+          },
+          "custom/shortenfastaids": {
+            "branch": "main",
+            "git_sha": "5e0e41b51d7fc7f68ae43692b6fe19b95d7f3a8c",
+            "installed_by": ["fasta_edta_lai", "modules"]
+          },
+          "edta/edta": {
+            "branch": "main",
+            "git_sha": "d14b1f1d790cc01c11f8ec5aa80a9562c6808b20",
+            "installed_by": ["fasta_edta_lai", "modules"]
+          },
+          "lai": {
+            "branch": "main",
+            "git_sha": "7e6e3cb41362a045c6bb6065903efa0eba246e87",
+            "installed_by": ["fasta_edta_lai"]
+          },
+          "liftoff": {
+            "branch": "main",
+            "git_sha": "3593ec100c92b656204bf739a51d62fd44d81f6f",
+            "installed_by": ["modules"]
+          }
         }
+      },
+      "subworkflows": {
+        "pfr": {
+          "fasta_edta_lai": {
+            "branch": "main",
+            "git_sha": "fde2f37998ba54ec9c0b5cf65a2f28f14af981b0",
+            "installed_by": ["subworkflows"]
+          }
+        }
+      }
+    },
+    "git@github.com:kherronism/nf-modules.git": {
+      "modules": {
+        "kherronism": {
+          "braker3": {
+            "branch": "dev",
+            "git_sha": "b01fec253f3b73b24e3f166a96d4beb49e58b0a6",
+            "installed_by": ["modules"]
+          },
+          "repeatmasker": {
+            "branch": "dev",
+            "git_sha": "6778d5bb4c9f3d597753c699226fcde8d0811bfb",
+            "installed_by": ["modules"]
+          }
+        }
+      }
+    },
+    "https://github.com/nf-core/modules.git": {
+      "modules": {
+        "nf-core": {
+          "cat/cat": {
+            "branch": "master",
+            "git_sha": "d593e8f6b7d1bbbb2acf43a4b9efeeac8d6720f2",
+            "installed_by": ["modules"]
+          },
+          "cat/fastq": {
+            "branch": "master",
+            "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+            "installed_by": ["modules"]
+          },
+          "custom/dumpsoftwareversions": {
+            "branch": "master",
+            "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e",
+            "installed_by": ["modules"]
+          },
+          "fastavalidator": {
+            "branch": "master",
+            "git_sha": "89ff95427f695086369d7927a3c17cea2a37a382",
+            "installed_by": ["modules"]
+          },
+          "fastp": {
+            "branch": "master",
+            "git_sha": "3c77ca9aac783e76c3614a06db3bfe4fef619bde",
+            "installed_by": ["fastq_fastqc_umitools_fastp"],
+            "patch": "modules/nf-core/fastp/fastp.diff"
+          },
+          "fastqc": {
+            "branch": "master",
+            "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53",
+            "installed_by": ["fastq_fastqc_umitools_fastp", "modules"]
+          },
+          "gffread": {
+            "branch": "master",
+            "git_sha": "a2d6c3082c5c44b4155a3246daff36701ee49af8",
+            "installed_by": ["modules"]
+          },
+          "gunzip": {
+            "branch": "master",
+            "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+            "installed_by": ["modules"]
+          },
+          "samtools/cat": {
+            "branch": "master",
+            "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
+            "installed_by": ["modules"]
+          },
+          "sortmerna": {
+            "branch": "master",
+            "git_sha": "a20b6b1e9114a08007608528e4a2b0fbbb8a9ca2",
+            "installed_by": ["modules"],
+            "patch": "modules/nf-core/sortmerna/sortmerna.diff"
+          },
+          "star/align": {
+            "branch": "master",
+            "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
+            "installed_by": ["modules"]
+          },
+          "star/genomegenerate": {
+            "branch": "master",
+            "git_sha": "d87a6e2156c2099c09280fa70776eaf0a824817a",
+            "installed_by": ["modules"]
+          },
+          "umitools/extract": {
+            "branch": "master",
+            "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53",
+            "installed_by": ["fastq_fastqc_umitools_fastp"]
+          }
+        }
+      },
+      "subworkflows": {
+        "nf-core": {
+          "fastq_fastqc_umitools_fastp": {
+            "branch": "master",
+            "git_sha": "3e8b0c1144ccf60b7848efbdc2be285ff20b49ee",
+            "installed_by": ["subworkflows"]
+          }
+        }
+      }
     }
-}
\ No newline at end of file
+  }
+}
diff --git a/subworkflows/local/prepare_assembly.nf b/subworkflows/local/prepare_assembly.nf
index db2e3c3..ed32afb 100644
--- a/subworkflows/local/prepare_assembly.nf
+++ b/subworkflows/local/prepare_assembly.nf
@@ -8,34 +8,35 @@ include { FASTA_EDTA_LAI                        } from '../../subworkflows/pfr/f
 
 workflow PREPARE_ASSEMBLY {
     take:
-    target_assembly     // channel: [ meta, fasta ]
-    te_library          // channel: [ meta, fasta ]
+    target_assembly             // channel: [ meta, fasta ]
+    te_library                  // channel: [ meta, fasta ]
 
     main:
+    ch_versions                 = Channel.empty()
+
     // MODULE: GUNZIP_TARGET_ASSEMBLY
-    target_assembly
-    | branch { meta, file ->
-        gz: "$file".endsWith(".gz")
-        rest: !"$file".endsWith(".gz")
-    }
-    | set { tech_target_assembly_branch }
+    target_assembly_branch      = target_assembly
+                                | branch { meta, file ->
+                                    gz: "$file".endsWith(".gz")
+                                    rest: !"$file".endsWith(".gz")
+                                }
+
+    GUNZIP_TARGET_ASSEMBLY ( target_assembly_branch.gz )
+
+    ch_gunzip_assembly          = GUNZIP_TARGET_ASSEMBLY.out.gunzip
+                                | mix(
+                                    target_assembly_branch.rest
+                                )
+    ch_versions                 = ch_versions.mix(GUNZIP_TARGET_ASSEMBLY.out.versions.first())
 
-    GUNZIP_TARGET_ASSEMBLY(
-        tech_target_assembly_branch.gz
-    )
-    .gunzip
-    | mix(
-        tech_target_assembly_branch.rest
-    )
-    | set { ch_gunzip_target_assembly }
 
     // MODULE: FASTAVALIDATOR
-    FASTAVALIDATOR(ch_gunzip_target_assembly)
+    FASTAVALIDATOR ( ch_gunzip_assembly )
 
-    ch_gunzip_target_assembly
-    | join(FASTAVALIDATOR.out.success_log)
-    | map { meta, fasta, log -> [ meta, fasta ] }
-    | set { ch_validated_target_assembly }
+    ch_validated_assembly       = ch_gunzip_assembly
+                                | join(FASTAVALIDATOR.out.success_log)
+                                | map { meta, fasta, log -> [ meta, fasta ] }
+    ch_versions                 = ch_versions.mix(FASTAVALIDATOR.out.versions.first())
 
     FASTAVALIDATOR.out.error_log
     | map { meta, log ->
@@ -43,72 +44,63 @@ workflow PREPARE_ASSEMBLY {
     }
 
     // MODULE: GUNZIP_TE_LIBRARY
-    te_library
-    | branch { meta, file ->
-        gz: "$file".endsWith(".gz")
-        rest: !"$file".endsWith(".gz")
-    }
-    | set { ch_te_library_branch }
+    ch_te_library_branch        = te_library
+                                | branch { meta, file ->
+                                    gz: "$file".endsWith(".gz")
+                                    rest: !"$file".endsWith(".gz")
+                                }
 
-    GUNZIP_TE_LIBRARY(
-        ch_te_library_branch.gz
-    )
-    .gunzip
-    | mix(
-        ch_te_library_branch.rest
-    )
-    | set { ch_gunzip_te_library }
+    GUNZIP_TE_LIBRARY ( ch_te_library_branch.gz )
+
+    ch_gunzip_te_library        = GUNZIP_TE_LIBRARY.out.gunzip
+                                | mix(
+                                    ch_te_library_branch.rest
+                                )
+    ch_versions                 = ch_versions.mix(GUNZIP_TE_LIBRARY.out.versions.first())
 
     // SUBWORKFLOW: FASTA_EDTA_LAI
-    ch_validated_target_assembly
-    | join(
-        ch_gunzip_te_library, remainder: true
-    )
-    | filter { meta, assembly, teLib ->
-        teLib == null
-    }
-    | map { meta, assembly, teLib -> [meta, assembly] }
-    | set { ch_edta_inputs }
+    ch_edta_inputs              = ch_validated_assembly
+                                | join(
+                                    ch_gunzip_te_library, remainder: true
+                                )
+                                | filter { meta, assembly, teLib ->
+                                    teLib == null
+                                }
+                                | map { meta, assembly, teLib -> [meta, assembly] }
     
-    FASTA_EDTA_LAI (
+    FASTA_EDTA_LAI(
         ch_edta_inputs,
         [],
         true // Skip LAI
     )
     
-    // MODULE: REPEATMASKER
-    ch_validated_target_assembly
-    | join(
-        FASTA_EDTA_LAI.out.te_lib_fasta.mix(ch_gunzip_te_library)
-    )
-    | set { ch_assembly_n_te_lib }
+    ch_assembly_and_te_lib      = ch_validated_assembly
+                                | join(
+                                    FASTA_EDTA_LAI.out.te_lib_fasta.mix(ch_gunzip_te_library)
+                                )
 
+    ch_versions                 = ch_versions.mix(FASTA_EDTA_LAI.out.versions.first())
+    
+    // MODULE: REPEATMASKER
     REPEATMASKER(
-        ch_assembly_n_te_lib.map { meta, assembly, teLib -> [meta, assembly] },
-        ch_assembly_n_te_lib.map { meta, assembly, teLib -> teLib },
+        ch_assembly_and_te_lib.map { meta, assembly, teLib -> [meta, assembly] },
+        ch_assembly_and_te_lib.map { meta, assembly, teLib -> teLib },
     )
 
+    ch_versions                 = ch_versions.mix(REPEATMASKER.out.versions.first())
+
     // MODULE: STAR_GENOMEGENERATE
-    def star_ignore_sjdbgtf = true
     STAR_GENOMEGENERATE(
-        ch_validated_target_assembly,
-        ch_validated_target_assembly.map { meta, fasta -> [ [], [] ] }
+        ch_validated_assembly,
+        ch_validated_assembly.map { meta, fasta -> [ [], [] ] }
     )
-    .index
-    | set { ch_assembly_index }
-
-    Channel.empty()
-    | mix(FASTAVALIDATOR.out.versions.first())
-    | mix(GUNZIP_TE_LIBRARY.out.versions.first())
-    | mix(FASTA_EDTA_LAI.out.versions)
-    | mix(REPEATMASKER.out.versions.first())
-    | mix(STAR_GENOMEGENERATE.out.versions.first())
-    | mix(GUNZIP_TARGET_ASSEMBLY.out.versions.first())
-    | set { ch_versions }
+
+    ch_assembly_index           = STAR_GENOMEGENERATE.out.index
+    ch_versions                 = ch_versions.mix(STAR_GENOMEGENERATE.out.versions.first())
     
     emit:
-    target_assemby          = ch_validated_target_assembly  // channel: [ meta, fasta ]
-    masked_target_assembly  = REPEATMASKER.out.fasta_masked // channel: [ meta, fasta ]
-    target_assemby_index    = ch_assembly_index             // channel: [ meta, star_index ]
-    versions                = ch_versions                   // channel: [ versions.yml ]
+    target_assemby              = ch_validated_assembly         // channel: [ meta, fasta ]
+    masked_target_assembly      = REPEATMASKER.out.fasta_masked // channel: [ meta, fasta ]
+    target_assemby_index        = ch_assembly_index             // channel: [ meta, star_index ]
+    versions                    = ch_versions                   // channel: [ versions.yml ]
 }
\ No newline at end of file

From 144edb244a50ac05acb770da6e49882c21a6cf47 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Thu, 21 Dec 2023 13:20:32 +1300
Subject: [PATCH 35/59] Cleaned up preprocess_rnaseq

---
 subworkflows/local/preprocess_rnaseq.nf | 100 ++++++++++++------------
 1 file changed, 49 insertions(+), 51 deletions(-)

diff --git a/subworkflows/local/preprocess_rnaseq.nf b/subworkflows/local/preprocess_rnaseq.nf
index 9184808..ba444bb 100644
--- a/subworkflows/local/preprocess_rnaseq.nf
+++ b/subworkflows/local/preprocess_rnaseq.nf
@@ -6,7 +6,7 @@ include { FASTQ_FASTQC_UMITOOLS_FASTP   } from '../../subworkflows/nf-core/fastq
 workflow PREPROCESS_RNASEQ {
     take:
     samplesheet                     // path: csv
-    permissible_target_assemblies   // val: assembly_a,assembly_b
+    permissible_assemblies          // val: assembly_a,assembly_b
     skip_fastqc                     // val: true|false
     skip_fastp                      // val: true|false
     save_trimmed                    // val: true|false
@@ -16,45 +16,46 @@ workflow PREPROCESS_RNASEQ {
     
     main:
     ch_versions = Channel.empty()
+    
     // SUBWORKFLOW: EXTRACT_SAMPLES
     EXTRACT_SAMPLES(
         samplesheet,
-        permissible_target_assemblies
+        permissible_assemblies
     )
-    .reads
-    | map { meta, fastq ->
-        groupID = meta.id - ~/_T\d+/
-        [ meta + [id: groupID], fastq ]
-    }
-    | groupTuple()
-    | branch { meta, fastq ->
-        single  : fastq.size() == 1
-            return [ meta, fastq.flatten() ]
-        multiple: fastq.size() > 1
-            return [ meta, fastq.flatten() ]
-    }
-    | set { ch_fastq }
 
-    EXTRACT_SAMPLES.out.assemblies
-    | map { meta, assembly ->
-        groupID = meta.id - ~/_T\d+/
-        [ meta + [id: groupID], assembly ]
-    }
-    | unique
-    | set { ch_reads_target }
+    ch_fastq                        = EXTRACT_SAMPLES.out.reads
+                                    | map { meta, fastq ->
+                                        groupID = meta.id - ~/_T\d+/
+                                        [ meta + [id: groupID], fastq ]
+                                    }
+                                    | groupTuple()
+                                    | branch { meta, fastq ->
+                                        single  : fastq.size() == 1
+                                            return [ meta, fastq.flatten() ]
+                                        multiple: fastq.size() > 1
+                                            return [ meta, fastq.flatten() ]
+                                    }
+
+    ch_reads_target                 = EXTRACT_SAMPLES.out.assemblies
+                                    | map { meta, assembly ->
+                                        groupID = meta.id - ~/_T\d+/
+                                        [ meta + [id: groupID], assembly ]
+                                    }
+                                    | unique
+
+    ch_versions                     = ch_versions.mix(EXTRACT_SAMPLES.out.versions)
 
     // MODULES: CAT_FASTQ
-    CAT_FASTQ (
-        ch_fastq.multiple
-    )
-    .reads
-    | mix(ch_fastq.single)
-    | set { ch_cat_fastq }
+    CAT_FASTQ ( ch_fastq.multiple )
+
+    ch_cat_fastq                    = CAT_FASTQ.out.reads.mix(ch_fastq.single)
+    ch_versions                     = ch_versions.mix(CAT_FASTQ.out.versions.first())
 
     // SUBWORKFLOW: FASTQ_FASTQC_UMITOOLS_FASTP
-    def with_umi            = false
-    def skip_umi_extract    = true
-    def umi_discard_read    = false
+    def with_umi                    = false
+    def skip_umi_extract            = true
+    def umi_discard_read            = false
+    
     FASTQ_FASTQC_UMITOOLS_FASTP (
         ch_cat_fastq,
         skip_fastqc,
@@ -67,8 +68,8 @@ workflow PREPROCESS_RNASEQ {
         save_trimmed,
         min_trimmed_reads
     )
-    .reads
-    | set { ch_trim_reads }
+
+    ch_trim_reads                   = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads
 
     ch_cat_fastq
     | join(ch_trim_reads, remainder:true)
@@ -78,26 +79,23 @@ workflow PREPROCESS_RNASEQ {
         }
     }
 
-    // MODULE: SORTMERNA
-    if (remove_ribo_rna) {
-        SORTMERNA (
-            ch_trim_reads,
-            sortmerna_fastas
-        )
-        .reads
-        | set { ch_sortmerna_reads }
+    ch_versions                     = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions.first())
 
-        ch_versions = ch_versions.mix(SORTMERNA.out.versions.first())
-    }
+    // MODULE: SORTMERNA
+    SORTMERNA(
+        remove_ribo_rna ? ch_trim_reads : Channel.empty(),
+        sortmerna_fastas
+    )
+    
+    ch_emitted_reads                = remove_ribo_rna
+                                    ? SORTMERNA.out.reads
+                                    : ch_trim_reads
+    ch_versions                     = ch_versions.mix(SORTMERNA.out.versions.first())
 
-    ch_versions
-    | mix(EXTRACT_SAMPLES.out.versions)
-    | mix(CAT_FASTQ.out.versions.first())
-    | mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions)
-    | set { ch_versions }
+    
 
     emit:
-    trim_reads      = remove_ribo_rna ? ch_sortmerna_reads : ch_trim_reads  // channel: [ meta, [ fq ] ]
-    reads_target    = ch_reads_target                                       // channel: [ meta, assembly_id ]
-    versions        = ch_versions                                           // channel: [ versions.yml ]
+    trim_reads                      = ch_emitted_reads  // channel: [ meta, [ fq ] ]
+    reads_target                    = ch_reads_target   // channel: [ meta, assembly_id ]
+    versions                        = ch_versions       // channel: [ versions.yml ]
 }
\ No newline at end of file

From a79c18729aebe6c72a7c34cf9c4a2fe0a7e5be4d Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Thu, 21 Dec 2023 13:50:49 +1300
Subject: [PATCH 36/59] Reformatted and inc ALIGN_RNASEQ

---
 subworkflows/local/align_rnaseq.nf | 105 +++++++++++++++--------------
 workflows/pangene.nf               |  18 ++---
 2 files changed, 64 insertions(+), 59 deletions(-)

diff --git a/subworkflows/local/align_rnaseq.nf b/subworkflows/local/align_rnaseq.nf
index c0a9039..544fea9 100644
--- a/subworkflows/local/align_rnaseq.nf
+++ b/subworkflows/local/align_rnaseq.nf
@@ -3,69 +3,74 @@ include { SAMTOOLS_CAT  } from '../../modules/nf-core/samtools/cat'
 
 workflow ALIGN_RNASEQ {
     take:
-    reads_target    // channel: [ meta, assembly_id ]
-    trim_reads      // channel: [ meta, [ fq ] ]
-    assembly_index  // channel: [ meta2, star_index ]
+    reads_target                // channel: [ meta, assembly_id ]
+    trim_reads                  // channel: [ meta, [ fq ] ]
+    assembly_index              // channel: [ meta2, star_index ]
     
     main:
+    ch_versions                 = Channel.empty()
+
     // MODULE: STAR_ALIGN
-    reads_target
-    | combine(trim_reads, by:0)
-    | map { meta, assembly, fastq ->
-        [assembly, [id:"${meta.id}.on.${assembly}", single_end:meta.single_end, target_assembly:assembly], fastq]
-    }
-    | combine(
-        assembly_index.map { meta, index -> [meta.id, index] },
-        by:0
-    )
-    | map { assembly, meta, fastq, index -> [meta, fastq, index] }
-    | set { ch_star_inputs }
+    ch_star_inputs              = reads_target
+                                | combine(trim_reads, by:0)
+                                | map { meta, assembly, fastq ->
+                                    [
+                                        assembly,
+                                        [
+                                            id: "${meta.id}.on.${assembly}",
+                                            single_end: meta.single_end,
+                                            target_assembly: assembly
+                                        ],
+                                        fastq
+                                    ]
+                                }
+                                | combine(
+                                    assembly_index.map { meta, index -> [ meta.id, index ] },
+                                    by:0
+                                )
+                                | map { assembly, meta, fastq, index -> [ meta, fastq, index ] }
 
-    def star_ignore_sjdbgtf = true
-    def seq_platform        = false
-    def seq_center          = false
+    def star_ignore_sjdbgtf     = true
+    def seq_platform            = false
+    def seq_center              = false
+    
     STAR_ALIGN(
-        ch_star_inputs.map { meta, fastq, index -> [meta, fastq] },
-        ch_star_inputs.map { meta, fastq, index -> [[id: meta.target_assembly], index] },
-        ch_star_inputs.map { meta, fastq, index -> [[id: meta.target_assembly], []] },
+        ch_star_inputs.map { meta, fastq, index -> [ meta, fastq ] },
+        ch_star_inputs.map { meta, fastq, index -> [ [ id: meta.target_assembly ], index ] },
+        ch_star_inputs.map { meta, fastq, index -> [ [ id: meta.target_assembly ], [] ] },
         star_ignore_sjdbgtf,
         seq_platform,
         seq_center
     )
-    .bam_sorted
-    | set { ch_star_bam }
+
+    ch_star_bam                 = STAR_ALIGN.out.bam_sorted
+    ch_versions                 = ch_versions.mix(STAR_ALIGN.out.versions.first())
 
     // MODULE: SAMTOOLS_CAT
-    ch_star_bam
-    | map { meta, bam ->
-        [
-            [id: meta.target_assembly],
-            bam instanceof List ? bam.find {it =~ /Aligned/} : bam
-        ]
-    }
-    | groupTuple
-    | branch { meta, bamList ->
-        bams: bamList.size() > 1
-        bam: bamList.size() <= 1
-    }
-    | set { ch_star_bam_branch }
+    ch_star_bam_branch          = ch_star_bam
+                                | map { meta, bam ->
+                                    [
+                                        [ id: meta.target_assembly ],
+                                        bam instanceof List ? bam.find { it =~ /Aligned/ } : bam
+                                    ]
+                                }
+                                | groupTuple
+                                | branch { meta, bamList ->
+                                    bams: bamList.size() > 1
+                                    bam: bamList.size() <= 1
+                                }
 
-    SAMTOOLS_CAT(
-        ch_star_bam_branch.bams
-    )
-    .bam
-    | map { meta, bam -> [meta, [bam]] }
-    | mix(
-        ch_star_bam_branch.bam
-    )
-    | set { ch_samtools_bam }
+    SAMTOOLS_CAT ( ch_star_bam_branch.bams )
 
-    Channel.empty()
-    | mix(STAR_ALIGN.out.versions.first())
-    | mix(SAMTOOLS_CAT.out.versions.first())
-    | set { ch_versions }
+    ch_samtools_bam             = SAMTOOLS_CAT.out.bam
+                                | map { meta, bam -> [meta, [bam]] }
+                                | mix(
+                                    ch_star_bam_branch.bam
+                                )
+    
+    ch_versions                 = ch_versions.mix(SAMTOOLS_CAT.out.versions.first())
     
     emit:
-    bam         = ch_samtools_bam   // channel: [ [ id, single_end, target_assembly ], [ bam ] ]
-    versions    = ch_versions       // channel: [ versions.yml ]
+    bam                         = ch_samtools_bam   // channel: [ [ id, single_end, target_assembly ], [ bam ] ]
+    versions                    = ch_versions       // channel: [ versions.yml ]
 }
\ No newline at end of file
diff --git a/workflows/pangene.nf b/workflows/pangene.nf
index 82fdbe4..53d8162 100644
--- a/workflows/pangene.nf
+++ b/workflows/pangene.nf
@@ -2,7 +2,7 @@ include { validateParams                } from '../modules/local/validate_params
 
 include { PREPARE_ASSEMBLY              } from '../subworkflows/local/prepare_assembly'
 include { PREPROCESS_RNASEQ             } from '../subworkflows/local/preprocess_rnaseq'
-// include { ALIGN_RNASEQ                  } from '../subworkflows/local/align_rnaseq'
+include { ALIGN_RNASEQ                  } from '../subworkflows/local/align_rnaseq'
 // include { PREPARE_EXT_PROTS             } from '../subworkflows/local/prepare_ext_prots'
 
 // include { BRAKER3                       } from '../modules/kherronism/braker3'
@@ -94,15 +94,15 @@ workflow PANGENE {
     ch_reads_target             = PREPROCESS_RNASEQ.out.reads_target
     ch_versions                 = ch_versions.mix(PREPROCESS_RNASEQ.out.versions)
 
-    // // SUBWORKFLOW: ALIGN_RNASEQ
-    // ALIGN_RNASEQ(
-    //     ch_reads_target,
-    //     ch_trim_reads,
-    //     ch_target_assemby_index
-    // )
+    // SUBWORKFLOW: ALIGN_RNASEQ
+    ALIGN_RNASEQ(
+        ch_reads_target,
+        ch_trim_reads,
+        ch_target_assemby_index
+    )
 
-    // ch_rnaseq_bam               = ALIGN_RNASEQ.out.bam
-    // ch_versions                 = ch_versions.mix(ALIGN_RNASEQ.out.versions)
+    ch_rnaseq_bam               = ALIGN_RNASEQ.out.bam
+    ch_versions                 = ch_versions.mix(ALIGN_RNASEQ.out.versions)
 
     // // MODULE: PREPARE_EXT_PROTS
     // PREPARE_EXT_PROTS(

From 6f61f5d3a688017c396275690f27163ee1560294 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Thu, 21 Dec 2023 14:13:50 +1300
Subject: [PATCH 37/59] Cleaned up and inc PREPARE_EXT_PROTS

---
 TODO.md                                 |  2 ++
 nextflow.config                         |  4 +--
 subworkflows/local/prepare_ext_prots.nf | 46 +++++++++++--------------
 workflows/pangene.nf                    | 32 ++++++++---------
 4 files changed, 40 insertions(+), 44 deletions(-)

diff --git a/TODO.md b/TODO.md
index 94f51c1..2366b0a 100644
--- a/TODO.md
+++ b/TODO.md
@@ -13,3 +13,5 @@
 > https://www.biorxiv.org/content/10.1101/096529v2.full.pdf
 
 > Don't use `-exclude_partial`
+
+- [ ] Sort out EDTA testing
diff --git a/nextflow.config b/nextflow.config
index 587694a..ef139b4 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -45,8 +45,8 @@ params {
     star_save_outputs           = true
 
     external_protein_fastas     = [
-        "/workspace/ComparativeDataSources/OrthoDB11/Viridiplantae.fa.gz",
-        "/output/genomic/fairGenomes/Plant/Actinidia/chinensis/var_chinensis/male/2x/assembly_russell/v2.1/RU01.20221115150135.pep.fasta"
+        ".test/ext_prots/Viridiplantae.fa.gz",
+        ".test/ext_prots/RU01.20221115150135.pep.fasta"
     ]
     // Optional: Set to null if not available
 
diff --git a/subworkflows/local/prepare_ext_prots.nf b/subworkflows/local/prepare_ext_prots.nf
index d14c60b..fff42ae 100644
--- a/subworkflows/local/prepare_ext_prots.nf
+++ b/subworkflows/local/prepare_ext_prots.nf
@@ -6,36 +6,30 @@ workflow PREPARE_EXT_PROTS {
     ch_ext_prot_fastas          // Channel: [ meta, fasta ]
     
     main:
-    ch_ext_prot_fastas
-    | branch { meta, file ->
-        gz: "$file".endsWith(".gz")
-        rest: !"$file".endsWith(".gz")
-    }
-    | set { ch_ext_prot_seqs_branch }
+    ch_versions                 = Channel.empty()
 
     // MODULE: GUNZIP
-    GUNZIP(
-        ch_ext_prot_seqs_branch.gz
-    )
-    .gunzip
-    | mix(
-        ch_ext_prot_seqs_branch.rest
-    )
-    | set { ch_ext_prot_gunzip_fastas }
+    ch_ext_prot_seqs_branch     = ch_ext_prot_fastas
+                                | branch { meta, file ->
+                                    gz: "$file".endsWith(".gz")
+                                    rest: !"$file".endsWith(".gz")
+                                }
+    
+    GUNZIP ( ch_ext_prot_seqs_branch.gz )
+    
+    ch_ext_prot_gunzip_fastas   = GUNZIP.out.gunzip.mix(ch_ext_prot_seqs_branch.rest)
+                                | map { meta, filePath -> filePath }
+                                | collect
+                                | map { fileList -> [ [ id: "ext_protein_seqs" ], fileList ] }
+    
+    ch_versions                 = ch_versions.mix(GUNZIP.out.versions.first())
 
-    // MODULE: CAT_PROTEIN_FASTAS
-    ch_ext_prot_gunzip_fastas
-    | map { meta, filePath -> filePath }
-    | collect
-    | map { fileList -> [[id:"ext_protein_seqs"], fileList] }
-    | CAT_PROTEIN_FASTAS
+    // MODULE: CAT_CAT as CAT_PROTEIN_FASTAS
+    CAT_PROTEIN_FASTAS ( ch_ext_prot_gunzip_fastas )
 
-    Channel.empty()
-    | mix(GUNZIP.out.versions.first())
-    | mix(CAT_PROTEIN_FASTAS.out.versions)
-    | set { ch_versions }
+    ch_versions                 = ch_versions.mix(CAT_PROTEIN_FASTAS.out.versions)
     
     emit:
-    ext_prots_fasta = CAT_PROTEIN_FASTAS.out.file_out   // Channel: [ meta, fasta ]
-    versions        = ch_versions                       // Channel: [ versions.yml ]
+    ext_prots_fasta             = CAT_PROTEIN_FASTAS.out.file_out   // Channel: [ meta, fasta ]
+    versions                    = ch_versions                       // Channel: [ versions.yml ]
 }
\ No newline at end of file
diff --git a/workflows/pangene.nf b/workflows/pangene.nf
index 53d8162..70ab2cc 100644
--- a/workflows/pangene.nf
+++ b/workflows/pangene.nf
@@ -3,7 +3,7 @@ include { validateParams                } from '../modules/local/validate_params
 include { PREPARE_ASSEMBLY              } from '../subworkflows/local/prepare_assembly'
 include { PREPROCESS_RNASEQ             } from '../subworkflows/local/preprocess_rnaseq'
 include { ALIGN_RNASEQ                  } from '../subworkflows/local/align_rnaseq'
-// include { PREPARE_EXT_PROTS             } from '../subworkflows/local/prepare_ext_prots'
+include { PREPARE_EXT_PROTS             } from '../subworkflows/local/prepare_ext_prots'
 
 // include { BRAKER3                       } from '../modules/kherronism/braker3'
 
@@ -19,12 +19,12 @@ workflow PANGENE {
 
     ch_target_assembly          = Channel.fromList(params.target_assemblies)
                                 | map { tag, filePath ->
-                                    [[id:tag], file(filePath, checkIfExists: true)]
+                                    [ [ id: tag ], file(filePath, checkIfExists: true) ]
                                 }
 
     ch_te_library               = Channel.fromList(params.te_libraries)
                                 | map { tag, filePath ->
-                                    [[id:tag], file(filePath, checkIfExists: true)]
+                                    [ [ id:tag ], file(filePath, checkIfExists: true) ]
                                 }
 
     ch_samplesheet              = params.samplesheet
@@ -46,13 +46,13 @@ workflow PANGENE {
                                 | collect
                                 : Channel.empty()
 
-    // ch_ext_prot_fastas          = params.external_protein_fastas
-    //                             ? Channel.fromList(params.external_protein_fastas)
-    //                             | map { filePath ->
-    //                                 def fileHandle = file(filePath, checkIfExists: true)
-    //                                 [[id:fileHandle.getSimpleName()], fileHandle]
-    //                             }
-    //                             : Channel.empty()
+    ch_ext_prot_fastas          = params.external_protein_fastas
+                                ? Channel.fromList(params.external_protein_fastas)
+                                | map { filePath ->
+                                    def fileHandle = file(filePath, checkIfExists: true)
+                                    [ [id: fileHandle.getSimpleName() ], fileHandle]
+                                }
+                                : Channel.empty()
     
     // ch_xref_annotations_mm      = params.liftoff_xref_annotations
     //                             ? Channel.fromList(params.liftoff_xref_annotations)
@@ -104,13 +104,13 @@ workflow PANGENE {
     ch_rnaseq_bam               = ALIGN_RNASEQ.out.bam
     ch_versions                 = ch_versions.mix(ALIGN_RNASEQ.out.versions)
 
-    // // MODULE: PREPARE_EXT_PROTS
-    // PREPARE_EXT_PROTS(
-    //     ch_ext_prot_fastas
-    // )
+    // MODULE: PREPARE_EXT_PROTS
+    PREPARE_EXT_PROTS(
+        ch_ext_prot_fastas
+    )
 
-    // ch_ext_prots_fasta          = PREPARE_EXT_PROTS.out.ext_prots_fasta
-    // ch_versions                 = ch_versions.mix(PREPARE_EXT_PROTS.out.versions)
+    ch_ext_prots_fasta          = PREPARE_EXT_PROTS.out.ext_prots_fasta
+    ch_versions                 = ch_versions.mix(PREPARE_EXT_PROTS.out.versions)
 
     // // MODULE: BRAKER3
     // ch_braker_inputs            = ch_masked_target_assembly

From 1184795c7a33d71591fa99aa56d23f6c7bdb68a3 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Thu, 21 Dec 2023 14:30:44 +1300
Subject: [PATCH 38/59] Cleanedup BRAKER3

---
 modules/kherronism/braker3/main.nf | 27 +++++++++---------
 workflows/pangene.nf               | 44 +++++++++++++++---------------
 2 files changed, 36 insertions(+), 35 deletions(-)

diff --git a/modules/kherronism/braker3/main.nf b/modules/kherronism/braker3/main.nf
index d44c986..14fc08c 100644
--- a/modules/kherronism/braker3/main.nf
+++ b/modules/kherronism/braker3/main.nf
@@ -2,10 +2,10 @@ process BRAKER3 {
     tag "${meta.id}"
     label 'process_high'
 
-    conda "bioconda::braker3=3.0.3"
+    conda "bioconda::braker3=3.0.6"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'registry.hub.docker.com/teambraker/braker3:v.1.0.6':
-        'registry.hub.docker.com/teambraker/braker3:v.1.0.6' }"
+        'https://depot.galaxyproject.org/singularity/braker3%3A3.0.6--hdfd78af_0':
+        'biocontainers/braker3:3.0.6--hdfd78af_0' }"
 
     input:
     tuple val(meta), path(fasta)
@@ -29,14 +29,14 @@ process BRAKER3 {
     task.ext.when == null || task.ext.when
 
     script:
-    def args = task.ext.args ?: ''
-    prefix   = task.ext.prefix ?: "${meta.id}"
+    def args        = task.ext.args         ?: ''
+    prefix          = task.ext.prefix       ?: "${meta.id}"
 
-    def rna_ids  = rnaseq_sets_ids ? "--rnaseq_sets_ids=${rnaseq_sets_ids}" : ''
-    def rna_dirs = rnaseq_sets_dirs ? "--rnaseq_sets_dirs=${rnaseq_sets_dirs}" : ''
-    def bam      = bam ? "--bam=${bam}" : ''
-    def proteins = proteins ? "--prot_seq=${proteins}" : ''
-    def hints    = hintsfile ? "--hints=${hintsfile}" : ''
+    def rna_ids     = rnaseq_sets_ids       ? "--rnaseq_sets_ids=${rnaseq_sets_ids}"    : ''
+    def rna_dirs    = rnaseq_sets_dirs      ? "--rnaseq_sets_dirs=${rnaseq_sets_dirs}"  : ''
+    def bam         = bam                   ? "--bam=${bam}"                            : ''
+    def proteins    = proteins              ? "--prot_seq=${proteins}"                  : ''
+    def hints       = hintsfile             ? "--hints=${hintsfile}"                    : ''
     """
     cp -r /usr/share/augustus/config augustus_config
 
@@ -60,15 +60,16 @@ process BRAKER3 {
     """
 
     stub:
-    prefix = task.ext.prefix ?: "${meta.id}"
-    def createHints = (rna_ids || bam || proteins || hints) ? "touch ${prefix}/hintsfile.gff" : ''
+    prefix          = task.ext.prefix                       ?: "${meta.id}"
+    def rna_ids     = rnaseq_sets_ids                       ? "--rnaseq_sets_ids=${rnaseq_sets_ids}"    : ''
+    def touch_hints = (rna_ids || bam || proteins || hints) ? "touch ${prefix}/hintsfile.gff"           : ''
     """
     mkdir "$prefix"
 
     touch "${prefix}/braker.gtf"
     touch "${prefix}/braker.codingseq"
     touch "${prefix}/braker.aa"
-    $createHints
+    $touch_hints
     touch "${prefix}/braker.log"
     touch "${prefix}/what-to-cite.txt"
 
diff --git a/workflows/pangene.nf b/workflows/pangene.nf
index 70ab2cc..4482c05 100644
--- a/workflows/pangene.nf
+++ b/workflows/pangene.nf
@@ -5,7 +5,7 @@ include { PREPROCESS_RNASEQ             } from '../subworkflows/local/preprocess
 include { ALIGN_RNASEQ                  } from '../subworkflows/local/align_rnaseq'
 include { PREPARE_EXT_PROTS             } from '../subworkflows/local/prepare_ext_prots'
 
-// include { BRAKER3                       } from '../modules/kherronism/braker3'
+include { BRAKER3                       } from '../modules/kherronism/braker3'
 
 // include { FASTA_LIFTOFF                 } from '../subworkflows/local/fasta_liftoff'
 
@@ -112,29 +112,29 @@ workflow PANGENE {
     ch_ext_prots_fasta          = PREPARE_EXT_PROTS.out.ext_prots_fasta
     ch_versions                 = ch_versions.mix(PREPARE_EXT_PROTS.out.versions)
 
-    // // MODULE: BRAKER3
-    // ch_braker_inputs            = ch_masked_target_assembly
-    //                             | join(ch_rnaseq_bam, remainder: true)
-    //                             | combine(
-    //                                 ch_ext_prots_fasta.map { meta, filePath -> filePath }.ifEmpty(null)
-    //                             )
-    //                             | map { meta, fasta, bam, prots -> [meta, fasta, bam ?: [], prots ?: []] }
+    // MODULE: BRAKER3
+    ch_braker_inputs            = ch_masked_target_assembly
+                                | join(ch_rnaseq_bam, remainder: true)
+                                | combine(
+                                    ch_ext_prots_fasta.map { meta, filePath -> filePath }.ifEmpty(null)
+                                )
+                                | map { meta, fasta, bam, prots -> [ meta, fasta, bam ?: [], prots ?: [] ] }
     
-    // def rnaseq_sets_dirs        = []
-    // def rnaseq_sets_ids         = []
-    // def hintsfile               = []
-
-    // BRAKER3(
-    //     ch_braker_inputs.map { meta, fasta, bam, prots -> [meta, fasta] },
-    //     ch_braker_inputs.map { meta, fasta, bam, prots -> bam },
-    //     rnaseq_sets_dirs,
-    //     rnaseq_sets_ids,
-    //     ch_braker_inputs.map { meta, fasta, bam, prots -> prots },
-    //     hintsfile
-    // )
+    def rnaseq_sets_dirs        = []
+    def rnaseq_sets_ids         = []
+    def hintsfile               = []
+
+    BRAKER3(
+        ch_braker_inputs.map { meta, fasta, bam, prots -> [meta, fasta] },
+        ch_braker_inputs.map { meta, fasta, bam, prots -> bam },
+        rnaseq_sets_dirs,
+        rnaseq_sets_ids,
+        ch_braker_inputs.map { meta, fasta, bam, prots -> prots },
+        hintsfile
+    )
 
-    // ch_braker_gff3              = BRAKER3.out.gff3
-    // ch_versions                 = ch_versions.mix(BRAKER3.out.versions.first())
+    ch_braker_gff3              = BRAKER3.out.gff3
+    ch_versions                 = ch_versions.mix(BRAKER3.out.versions.first())
 
     // // SUBWORKFLOW: FASTA_LIFTOFF
     // FASTA_LIFTOFF(

From a9f1fc6c91188256aec0359eb26ececb4a3d68b0 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Thu, 21 Dec 2023 15:32:48 +1300
Subject: [PATCH 39/59] Updated fastp, sortmerna and liftoff

---
 modules.json                                  |  12 +-
 modules/nf-core/fastp/fastp.diff              |  28 --
 modules/nf-core/fastp/main.nf                 |   2 +-
 modules/nf-core/fastp/tests/main.nf.test      | 241 ++++++++++++++++++
 modules/nf-core/fastp/tests/main.nf.test.snap |  55 ++++
 modules/nf-core/sortmerna/environment.yml     |   2 +-
 modules/nf-core/sortmerna/main.nf             |   6 +-
 modules/nf-core/sortmerna/sortmerna.diff      |  36 ---
 modules/nf-core/sortmerna/tests/main.nf.test  |  97 ++++++-
 .../nf-core/sortmerna/tests/main.nf.test.snap |  46 ++--
 modules/pfr/liftoff/main.nf                   |  19 +-
 modules/pfr/liftoff/meta.yml                  |   8 +-
 modules/pfr/liftoff/tests/main.nf.test        |  54 +++-
 modules/pfr/liftoff/tests/main.nf.test.snap   |  11 +
 14 files changed, 483 insertions(+), 134 deletions(-)
 delete mode 100644 modules/nf-core/fastp/fastp.diff
 delete mode 100644 modules/nf-core/sortmerna/sortmerna.diff

diff --git a/modules.json b/modules.json
index 299e449..633ed49 100644
--- a/modules.json
+++ b/modules.json
@@ -27,7 +27,7 @@
           },
           "liftoff": {
             "branch": "main",
-            "git_sha": "3593ec100c92b656204bf739a51d62fd44d81f6f",
+            "git_sha": "444b35f4e6285115f84d2bfce49fc0e6d8a2754e",
             "installed_by": ["modules"]
           }
         }
@@ -83,9 +83,8 @@
           },
           "fastp": {
             "branch": "master",
-            "git_sha": "3c77ca9aac783e76c3614a06db3bfe4fef619bde",
-            "installed_by": ["fastq_fastqc_umitools_fastp"],
-            "patch": "modules/nf-core/fastp/fastp.diff"
+            "git_sha": "d086322563bdbb08c94bf15a7db58a39ccdb1520",
+            "installed_by": ["fastq_fastqc_umitools_fastp"]
           },
           "fastqc": {
             "branch": "master",
@@ -109,9 +108,8 @@
           },
           "sortmerna": {
             "branch": "master",
-            "git_sha": "a20b6b1e9114a08007608528e4a2b0fbbb8a9ca2",
-            "installed_by": ["modules"],
-            "patch": "modules/nf-core/sortmerna/sortmerna.diff"
+            "git_sha": "ce558e30784469b88a16923ca96d81899d240b42",
+            "installed_by": ["modules"]
           },
           "star/align": {
             "branch": "master",
diff --git a/modules/nf-core/fastp/fastp.diff b/modules/nf-core/fastp/fastp.diff
deleted file mode 100644
index 4213043..0000000
--- a/modules/nf-core/fastp/fastp.diff
+++ /dev/null
@@ -1,28 +0,0 @@
-Changes in module 'nf-core/fastp'
---- modules/nf-core/fastp/main.nf
-+++ modules/nf-core/fastp/main.nf
-@@ -99,4 +99,22 @@
-         END_VERSIONS
-         """
-     }
-+    
-+    stub:
-+    def prefix              = task.ext.prefix ?: "${meta.id}"
-+    def is_single_output    = task.ext.args?.contains('--interleaved_in') || meta.single_end
-+    def touch_reads         = is_single_output ? "${prefix}.fastp.fastq.gz" : "${prefix}_1.fastp.fastq.gz ${prefix}_2.fastp.fastq.gz"
-+    def touch_merged        = (!is_single_output && save_merged) ? "touch ${prefix}.merged.fastq.gz" : ""
-+    """
-+    touch $touch_reads
-+    touch "${prefix}.fastp.json"
-+    touch "${prefix}.fastp.html"
-+    touch "${prefix}.fastp.log"
-+    $touch_merged
-+
-+    cat <<-END_VERSIONS > versions.yml
-+    "${task.process}":
-+        fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
-+    END_VERSIONS
-+    """
- }
-
-************************************************************
diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf
index 1f56640..2a3b679 100644
--- a/modules/nf-core/fastp/main.nf
+++ b/modules/nf-core/fastp/main.nf
@@ -99,7 +99,7 @@ process FASTP {
         END_VERSIONS
         """
     }
-    
+
     stub:
     def prefix              = task.ext.prefix ?: "${meta.id}"
     def is_single_output    = task.ext.args?.contains('--interleaved_in') || meta.single_end
diff --git a/modules/nf-core/fastp/tests/main.nf.test b/modules/nf-core/fastp/tests/main.nf.test
index f610b73..17dce8a 100644
--- a/modules/nf-core/fastp/tests/main.nf.test
+++ b/modules/nf-core/fastp/tests/main.nf.test
@@ -57,6 +57,67 @@ nextflow_process {
                     { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
                     }
                 },
+                {
+                    assert snapshot(
+                        (
+                            [process.out.reads[0][0].toString()] + // meta
+                            process.out.reads.collect { file(it[1]).getName() } +
+                            process.out.json.collect { file(it[1]).getName() } +
+                            process.out.html.collect { file(it[1]).getName() } +
+                            process.out.log.collect { file(it[1]).getName() } +
+                            process.out.reads_fail.collect { file(it[1]).getName() } +
+                            process.out.reads_merged.collect { file(it[1]).getName() }
+                        ).sort()
+                    ).match("test_fastp_single_end-for_stub_match")
+                },
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+    }
+
+    test("test_fastp_single_end-stub") {
+
+        options '-stub'
+
+        when {
+            params {
+                outdir   = "$outputDir"
+            }
+            process {
+                """
+                adapter_fasta     = []
+                save_trimmed_fail = false
+                save_merged       = false
+
+                input[0] = [
+                    [ id:'test', single_end:true ],
+                    [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
+                ]
+
+                input[1] = adapter_fasta
+                input[2] = save_trimmed_fail
+                input[3] = save_merged
+                """
+            }
+        }
+
+        then {
+
+            assertAll(
+                { assert process.success },
+                {
+                    assert snapshot(
+                        (
+                            [process.out.reads[0][0].toString()] + // meta
+                            process.out.reads.collect { file(it[1]).getName() } +
+                            process.out.json.collect { file(it[1]).getName() } +
+                            process.out.html.collect { file(it[1]).getName() } +
+                            process.out.log.collect { file(it[1]).getName() } +
+                            process.out.reads_fail.collect { file(it[1]).getName() } +
+                            process.out.reads_merged.collect { file(it[1]).getName() }
+                        ).sort()
+                    ).match("test_fastp_single_end-for_stub_match")
+                },
                 { assert snapshot(process.out.versions).match("versions") }
             )
         }
@@ -127,6 +188,67 @@ nextflow_process {
                     { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
                     }
                 },
+                {
+                    assert snapshot(
+                        (
+                            [process.out.reads[0][0].toString()] + // meta
+                            process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
+                            process.out.json.collect { file(it[1]).getName() } +
+                            process.out.html.collect { file(it[1]).getName() } +
+                            process.out.log.collect { file(it[1]).getName() } +
+                            process.out.reads_fail.collect { file(it[1]).getName() } +
+                            process.out.reads_merged.collect { file(it[1]).getName() }
+                        ).sort()
+                    ).match("test_fastp_paired_end-for_stub_match")
+                },
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+    }
+
+    test("test_fastp_paired_end-stub") {
+
+        options '-stub'
+
+        when {
+            params {
+                outdir   = "$outputDir"
+            }
+            process {
+                """
+                adapter_fasta     = []
+                save_trimmed_fail = false
+                save_merged       = false
+
+                input[0] = [
+                    [ id:'test', single_end:false ], // meta map
+                    [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+                        file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
+                ]
+
+                input[1] = adapter_fasta
+                input[2] = save_trimmed_fail
+                input[3] = save_merged
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                {
+                    assert snapshot(
+                        (
+                            [process.out.reads[0][0].toString()] + // meta
+                            process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
+                            process.out.json.collect { file(it[1]).getName() } +
+                            process.out.html.collect { file(it[1]).getName() } +
+                            process.out.log.collect { file(it[1]).getName() } +
+                            process.out.reads_fail.collect { file(it[1]).getName() } +
+                            process.out.reads_merged.collect { file(it[1]).getName() }
+                        ).sort()
+                    ).match("test_fastp_paired_end-for_stub_match")
+                },
                 { assert snapshot(process.out.versions).match("versions") }
             )
         }
@@ -181,6 +303,66 @@ nextflow_process {
                     { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
                     }
                 },
+                {
+                    assert snapshot(
+                        (
+                            [process.out.reads[0][0].toString()] + // meta
+                            process.out.reads.collect { file(it[1]).getName() } +
+                            process.out.json.collect { file(it[1]).getName() } +
+                            process.out.html.collect { file(it[1]).getName() } +
+                            process.out.log.collect { file(it[1]).getName() } +
+                            process.out.reads_fail.collect { file(it[1]).getName() } +
+                            process.out.reads_merged.collect { file(it[1]).getName() }
+                        ).sort()
+                    ).match("test_fastp_interleaved-for_stub_match")
+                },
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+    }
+
+    test("fastp test_fastp_interleaved-stub") {
+
+        options '-stub'
+
+        config './nextflow.config'
+        when {
+            params {
+                outdir   = "$outputDir"
+            }
+            process {
+                """
+                adapter_fasta     = []
+                save_trimmed_fail = false
+                save_merged       = false
+
+                input[0] = [ [ id:'test', single_end:true ], // meta map
+                            [ file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) ]
+                        ]
+
+                input[1] = adapter_fasta
+                input[2] = save_trimmed_fail
+                input[3] = save_merged
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                {
+                    assert snapshot(
+                        (
+                            [process.out.reads[0][0].toString()] + // meta
+                            process.out.reads.collect { file(it[1]).getName() } +
+                            process.out.json.collect { file(it[1]).getName() } +
+                            process.out.html.collect { file(it[1]).getName() } +
+                            process.out.log.collect { file(it[1]).getName() } +
+                            process.out.reads_fail.collect { file(it[1]).getName() } +
+                            process.out.reads_merged.collect { file(it[1]).getName() }
+                        ).sort()
+                    ).match("test_fastp_interleaved-for_stub_match")
+                },
                 { assert snapshot(process.out.versions).match("versions") }
             )
         }
@@ -399,6 +581,65 @@ nextflow_process {
                     { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
                     }
                 },
+                {
+                    assert snapshot(
+                        (
+                            [process.out.reads[0][0].toString()] + // meta
+                            process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
+                            process.out.json.collect { file(it[1]).getName() } +
+                            process.out.html.collect { file(it[1]).getName() } +
+                            process.out.log.collect { file(it[1]).getName() } +
+                            process.out.reads_fail.collect { file(it[1]).getName() } +
+                            process.out.reads_merged.collect { file(it[1]).getName() }
+                        ).sort()
+                    ).match("test_fastp_paired_end_merged-for_stub_match")
+                },
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+    }
+
+    test("test_fastp_paired_end_merged-stub") {
+
+        options '-stub'
+
+        when {
+            params {
+                outdir   = "$outputDir"
+            }
+            process {
+                """
+                adapter_fasta     = []
+                save_trimmed_fail = false
+                save_merged       = true
+
+                input[0] = [ [ id:'test', single_end:false ], // meta map
+                             [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+                               file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
+                           ]
+                input[1] = adapter_fasta
+                input[2] = save_trimmed_fail
+                input[3] = save_merged
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                {
+                    assert snapshot(
+                        (
+                            [process.out.reads[0][0].toString()] + // meta
+                            process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
+                            process.out.json.collect { file(it[1]).getName() } +
+                            process.out.html.collect { file(it[1]).getName() } +
+                            process.out.log.collect { file(it[1]).getName() } +
+                            process.out.reads_fail.collect { file(it[1]).getName() } +
+                            process.out.reads_merged.collect { file(it[1]).getName() }
+                        ).sort()
+                    ).match("test_fastp_paired_end_merged-for_stub_match")
+                },
                 { assert snapshot(process.out.versions).match("versions") }
             )
         }
diff --git a/modules/nf-core/fastp/tests/main.nf.test.snap b/modules/nf-core/fastp/tests/main.nf.test.snap
index 0fa68c7..1b7d241 100644
--- a/modules/nf-core/fastp/tests/main.nf.test.snap
+++ b/modules/nf-core/fastp/tests/main.nf.test.snap
@@ -1,4 +1,19 @@
 {
+    "test_fastp_paired_end-for_stub_match": {
+        "content": [
+            [
+                [
+                    "test_1.fastp.fastq.gz",
+                    "test_2.fastp.fastq.gz"
+                ],
+                "test.fastp.html",
+                "test.fastp.json",
+                "test.fastp.log",
+                "{id=test, single_end=false}"
+            ]
+        ],
+        "timestamp": "2023-12-21T09:44:37.202512"
+    },
     "fastp test_fastp_interleaved_json": {
         "content": [
             [
@@ -13,6 +28,22 @@
         ],
         "timestamp": "2023-10-17T11:04:45.794175881"
     },
+    "test_fastp_paired_end_merged-for_stub_match": {
+        "content": [
+            [
+                [
+                    "test_1.fastp.fastq.gz",
+                    "test_2.fastp.fastq.gz"
+                ],
+                "test.fastp.html",
+                "test.fastp.json",
+                "test.fastp.log",
+                "test.merged.fastq.gz",
+                "{id=test, single_end=false}"
+            ]
+        ],
+        "timestamp": "2023-12-21T09:53:45.237014"
+    },
     "test_fastp_single_end_json": {
         "content": [
             [
@@ -35,6 +66,30 @@
         ],
         "timestamp": "2023-10-17T11:04:10.582076024"
     },
+    "test_fastp_interleaved-for_stub_match": {
+        "content": [
+            [
+                "test.fastp.fastq.gz",
+                "test.fastp.html",
+                "test.fastp.json",
+                "test.fastp.log",
+                "{id=test, single_end=true}"
+            ]
+        ],
+        "timestamp": "2023-12-21T09:48:43.148485"
+    },
+    "test_fastp_single_end-for_stub_match": {
+        "content": [
+            [
+                "test.fastp.fastq.gz",
+                "test.fastp.html",
+                "test.fastp.json",
+                "test.fastp.log",
+                "{id=test, single_end=true}"
+            ]
+        ],
+        "timestamp": "2023-12-21T09:20:07.254788"
+    },
     "test_fastp_single_end_trim_fail_json": {
         "content": [
             [
diff --git a/modules/nf-core/sortmerna/environment.yml b/modules/nf-core/sortmerna/environment.yml
index 3dae00a..f40f995 100644
--- a/modules/nf-core/sortmerna/environment.yml
+++ b/modules/nf-core/sortmerna/environment.yml
@@ -4,4 +4,4 @@ channels:
   - bioconda
   - defaults
 dependencies:
-  - bioconda::sortmerna=4.3.4
+  - bioconda::sortmerna=4.3.6
diff --git a/modules/nf-core/sortmerna/main.nf b/modules/nf-core/sortmerna/main.nf
index 909a7b1..29c640c 100644
--- a/modules/nf-core/sortmerna/main.nf
+++ b/modules/nf-core/sortmerna/main.nf
@@ -1,11 +1,11 @@
 process SORTMERNA {
     tag "$meta.id"
-    label "process_high"
+    label 'process_high'
 
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/sortmerna:4.3.4--h9ee0642_0' :
-        'biocontainers/sortmerna:4.3.4--h9ee0642_0' }"
+        'https://depot.galaxyproject.org/singularity/sortmerna:4.3.6--h9ee0642_0' :
+        'biocontainers/sortmerna:4.3.6--h9ee0642_0' }"
 
     input:
     tuple val(meta), path(reads)
diff --git a/modules/nf-core/sortmerna/sortmerna.diff b/modules/nf-core/sortmerna/sortmerna.diff
deleted file mode 100644
index 66d58d5..0000000
--- a/modules/nf-core/sortmerna/sortmerna.diff
+++ /dev/null
@@ -1,36 +0,0 @@
-Changes in module 'nf-core/sortmerna'
---- modules/nf-core/sortmerna/main.nf
-+++ modules/nf-core/sortmerna/main.nf
-@@ -67,4 +67,30 @@
-         END_VERSIONS
-         """
-     }
-+
-+    stub:
-+    def args = task.ext.args ?: ''
-+    def prefix = task.ext.prefix ?: "${meta.id}"
-+    if (meta.single_end) {
-+        """
-+        touch ${prefix}.non_rRNA.fastq.gz
-+        touch ${prefix}.sortmerna.log
-+
-+        cat <<-END_VERSIONS > versions.yml
-+        "${task.process}":
-+            sortmerna: \$(echo \$(sortmerna --version 2>&1) | sed 's/^.*SortMeRNA version //; s/ Build Date.*\$//')
-+        END_VERSIONS
-+        """
-+    } else {
-+        """
-+        touch ${prefix}_1.non_rRNA.fastq.gz
-+        touch ${prefix}_2.non_rRNA.fastq.gz
-+        touch ${prefix}.sortmerna.log
-+
-+        cat <<-END_VERSIONS > versions.yml
-+        "${task.process}":
-+            sortmerna: \$(echo \$(sortmerna --version 2>&1) | sed 's/^.*SortMeRNA version //; s/ Build Date.*\$//')
-+        END_VERSIONS
-+        """
-+    }
- }
-
-************************************************************
diff --git a/modules/nf-core/sortmerna/tests/main.nf.test b/modules/nf-core/sortmerna/tests/main.nf.test
index 3ec2692..8a01e2a 100644
--- a/modules/nf-core/sortmerna/tests/main.nf.test
+++ b/modules/nf-core/sortmerna/tests/main.nf.test
@@ -23,9 +23,51 @@ nextflow_process {
         then {
             assertAll(
                 { assert process.success },
-                { assert snapshot(process.out.reads).match("se_reads") },
-                { assert process.out.log },
-                { assert snapshot(process.out.versions).match("se_versions") }
+                { assert process.out.reads },
+                { assert file(process.out.log[0][1]).text.contains("Total reads passing E-value threshold = 100 (100.00)") },
+                {
+                    assert snapshot(
+                        (
+                            [process.out.reads[0][0].toString()] + // meta
+                            process.out.reads.collect { file(it[1]).getName() } +
+                            process.out.log.collect { file(it[1]).getName() }
+                        ).sort()
+                    ).match("sarscov2 single_end-for_stub_match")
+                },
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+
+    }
+
+    test("sarscov2 single_end stub") {
+
+        options '-stub'
+
+        when {
+            process {
+                """
+                input[0] = [ [ id:'test', single_end:true ], // meta map
+                             [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
+                           ]
+                input[1] = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                {
+                    assert snapshot(
+                        (
+                            [process.out.reads[0][0].toString()] + // meta
+                            process.out.reads.collect { file(it[1]).getName() } +
+                            process.out.log.collect { file(it[1]).getName() }
+                        ).sort()
+                    ).match("sarscov2 single_end-for_stub_match")
+                },
+                { assert snapshot(process.out.versions).match("versions") }
             )
         }
 
@@ -48,9 +90,52 @@ nextflow_process {
         then {
             assertAll(
                 { assert process.success },
-                { assert snapshot(process.out.reads).match("pe_reads") },
-                { assert process.out.log },
-                { assert snapshot(process.out.versions).match("pe_versions") }
+                { assert process.out.reads },
+                { assert file(process.out.log[0][1]).text.contains("Total reads passing E-value threshold = 200 (100.00)") },
+                {
+                    assert snapshot(
+                        (
+                            [process.out.reads[0][0].toString()] + // meta
+                            process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
+                            process.out.log.collect { file(it[1]).getName() }
+                        ).sort()
+                    ).match("sarscov2 paired_end-for_stub_match")
+                },
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+
+    }
+
+    test("sarscov2 paired_end stub") {
+
+        options '-stub'
+
+        when {
+            process {
+                """
+                input[0] = [ [ id:'test', single_end:false ], // meta map
+                             [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+                               file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
+                           ]
+                input[1] = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                {
+                    assert snapshot(
+                        (
+                            [process.out.reads[0][0].toString()] + // meta
+                            process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
+                            process.out.log.collect { file(it[1]).getName() }
+                        ).sort()
+                    ).match("sarscov2 paired_end-for_stub_match")
+                },
+                { assert snapshot(process.out.versions).match("versions") }
             )
         }
 
diff --git a/modules/nf-core/sortmerna/tests/main.nf.test.snap b/modules/nf-core/sortmerna/tests/main.nf.test.snap
index f1bedb7..e502000 100644
--- a/modules/nf-core/sortmerna/tests/main.nf.test.snap
+++ b/modules/nf-core/sortmerna/tests/main.nf.test.snap
@@ -1,49 +1,33 @@
 {
-    "se_versions": {
+    "sarscov2 single_end-for_stub_match": {
         "content": [
             [
-                "versions.yml:md5,96553a18cad5237fbf76d5a6c966360e"
+                "test.non_rRNA.fastq.gz",
+                "test.sortmerna.log",
+                "{id=test, single_end=true}"
             ]
         ],
-        "timestamp": "2023-11-22T14:25:07.95908694"
+        "timestamp": "2023-12-21T11:56:00.15356"
     },
-    "pe_reads": {
+    "versions": {
         "content": [
             [
-                [
-                    {
-                        "id": "test",
-                        "single_end": false
-                    },
-                    [
-                        "test_1.non_rRNA.fastq.gz:md5,e62ff0123a74adfc6903d59a449cbdb0",
-                        "test_2.non_rRNA.fastq.gz:md5,e62ff0123a74adfc6903d59a449cbdb0"
-                    ]
-                ]
+                "versions.yml:md5,7df9d50209f351e1f75e05a1fad6ba4b"
             ]
         ],
-        "timestamp": "2023-11-22T14:25:19.098771475"
+        "timestamp": "2023-12-21T11:56:00.200244"
     },
-    "se_reads": {
+    "sarscov2 paired_end-for_stub_match": {
         "content": [
             [
                 [
-                    {
-                        "id": "test",
-                        "single_end": true
-                    },
-                    "test.non_rRNA.fastq.gz:md5,e62ff0123a74adfc6903d59a449cbdb0"
-                ]
-            ]
-        ],
-        "timestamp": "2023-11-22T14:25:07.949212892"
-    },
-    "pe_versions": {
-        "content": [
-            [
-                "versions.yml:md5,96553a18cad5237fbf76d5a6c966360e"
+                    "test_1.non_rRNA.fastq.gz",
+                    "test_2.non_rRNA.fastq.gz"
+                ],
+                "test.sortmerna.log",
+                "{id=test, single_end=false}"
             ]
         ],
-        "timestamp": "2023-11-22T14:25:19.105098985"
+        "timestamp": "2023-12-21T12:00:47.879193"
     }
 }
\ No newline at end of file
diff --git a/modules/pfr/liftoff/main.nf b/modules/pfr/liftoff/main.nf
index a382dab..317eca1 100644
--- a/modules/pfr/liftoff/main.nf
+++ b/modules/pfr/liftoff/main.nf
@@ -9,11 +9,11 @@ process LIFTOFF {
 
     input:
     tuple val(meta), path(target_fa)
-    path ref_fa, name: 'liftoff_reference_assembly.fa' // To avoid name collisions betwen target_fa and ref_fa
+    path ref_fa, name: 'ref_assembly.fa'
     path ref_annotation
 
     output:
-    tuple val(meta), path("${prefix}.gff3")     , emit: gff3    // To avoid pattern collision with '*.polished.gff3'
+    tuple val(meta), path("${prefix}.gff3")     , emit: gff3
     tuple val(meta), path("*.polished.gff3")    , emit: polished_gff3, optional: true
     tuple val(meta), path("*.unmapped.txt")     , emit: unmapped_txt
     path "versions.yml"                         , emit: versions
@@ -22,8 +22,8 @@ process LIFTOFF {
     task.ext.when == null || task.ext.when
 
     script:
-    def args    = task.ext.args ?: ''
-    prefix      = task.ext.prefix ?: "${meta.id}"
+    def args    = task.ext.args     ?: ''
+    prefix      = task.ext.prefix   ?: "${meta.id}"
     """
     liftoff \\
         -g $ref_annotation \\
@@ -32,9 +32,11 @@ process LIFTOFF {
         -u "${prefix}.unmapped.txt" \\
         $args \\
         $target_fa \\
-        liftoff_reference_assembly.fa
+        ref_assembly.fa
 
-    mv "${prefix}.gff3_polished" "${prefix}.polished.gff3" \\
+    mv \\
+        "${prefix}.gff3_polished" \\
+        "${prefix}.polished.gff3" \\
         || echo "-polish is absent"
 
     cat <<-END_VERSIONS > versions.yml
@@ -44,10 +46,13 @@ process LIFTOFF {
     """
 
     stub:
-    prefix = task.ext.prefix ?: "${meta.id}"
+    def args            = task.ext.args     ?: ''
+    prefix              = task.ext.prefix   ?: "${meta.id}"
+    def touch_polished  = args.contains('-polish') ? "touch ${prefix}.polished.gff3" : ''
     """
     touch "${prefix}.gff3"
     touch "${prefix}.unmapped.txt"
+    $touch_polished
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/modules/pfr/liftoff/meta.yml b/modules/pfr/liftoff/meta.yml
index ad1c5b8..46b3c58 100644
--- a/modules/pfr/liftoff/meta.yml
+++ b/modules/pfr/liftoff/meta.yml
@@ -1,7 +1,9 @@
 ---
 # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
 name: "liftoff"
-description: "Liftoff is a tool that accurately maps annotations in GFF or GTF between assemblies of the same, or closely-related species."
+description: |
+  Uses Liftoff to accurately map annotations in GFF or GTF between assemblies of the same,
+  or closely-related species
 keywords:
   - genome
   - annotation
@@ -10,7 +12,9 @@ keywords:
   - liftover
 tools:
   - "liftoff":
-      description: "Liftoff is a tool that accurately maps annotations in GFF or GTF between assemblies of the same, or closely-related species."
+      description: |
+        Liftoff is a tool that accurately maps annotations in GFF or GTF between assemblies of the same,
+        or closely-related species
       homepage: "https://github.com/agshumate/Liftoff"
       documentation: "https://github.com/agshumate/Liftoff"
       tool_dev_url: "https://github.com/agshumate/Liftoff"
diff --git a/modules/pfr/liftoff/tests/main.nf.test b/modules/pfr/liftoff/tests/main.nf.test
index 00d1d2a..272c882 100644
--- a/modules/pfr/liftoff/tests/main.nf.test
+++ b/modules/pfr/liftoff/tests/main.nf.test
@@ -7,6 +7,7 @@ nextflow_process {
 
     tag "modules"
     tag "modules_nfcore"
+    tag "nf-core/gunzip"
     tag "liftoff"
 
     test("homo_sapiens-genome_21_fasta-genome_1_fasta-genome_1_gtf") {
@@ -45,16 +46,41 @@ nextflow_process {
             assertAll(
                 { assert process.success },
                 { assert snapshot(process.out.unmapped_txt).match("unmapped_txt") },
-                { assert snapshot(process.out.versions).match("versions") },
-                { assert path(process.out.gff3.get(0).get(1)).getText().contains("chr21\tLiftoff\texon\t34608061\t34608118\t.\t+\t.") },
-                { assert path(process.out.polished_gff3.get(0).get(1)).getText().contains("chr21\tLiftoff\texon\t34608061\t34608118\t.\t+\t.") }
+                { assert file(process.out.gff3[0][1]).text.contains("chr21\tLiftoff\texon\t34608061\t34608118\t.\t+\t.") },
+                { assert file(process.out.polished_gff3[0][1]).text.contains("chr21\tLiftoff\texon\t34608061\t34608118\t.\t+\t.") },
+                {
+                    assert snapshot(
+                        (
+                            [process.out.gff3[0][0].toString()] + // meta
+                            process.out.gff3.collect { file(it[1]).getName() } +
+                            process.out.polished_gff3.collect { file(it[1]).getName() } +
+                            process.out.unmapped_txt.collect { file(it[1]).getName() }
+                        ).sort()
+                    ).match("homo_sapiens-genome_21_fasta-genome_1_fasta-genome_1_gtf-for_stub_match")
+                },
+                { assert snapshot(process.out.versions).match("versions") }
             )
         }
 
     }
 
-    test("stub") {
-        options "-stub"
+    test("homo_sapiens-genome_21_fasta-genome_1_fasta-genome_1_gtf-stub") {
+        options '-stub'
+
+        setup {
+            run("GUNZIP") {
+                script "../../../nf-core/gunzip"
+
+                process {
+                    """
+                    input[0] = [
+                        [ id:'test' ],
+                        file(params.test_data['homo_sapiens']['genome']['genome_1_fasta'], checkIfExists: true)
+                    ]
+                    """
+                }
+            }
+        }
 
         when {
             process {
@@ -63,9 +89,7 @@ nextflow_process {
                     [ id:'test' ],
                     file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
                 ]
-                input[1] = [
-                    file(params.test_data['homo_sapiens']['genome']['genome_1_fasta'], checkIfExists: true)
-                ]
+                input[1] = GUNZIP.out.gunzip.map { meta, file -> file }
                 input[2] = [
                     file(params.test_data['homo_sapiens']['genome']['genome_1_gtf'], checkIfExists: true)
                 ]
@@ -76,10 +100,16 @@ nextflow_process {
         then {
             assertAll(
                 { assert process.success },
-                { assert process.out.gff3 != null },
-                { assert process.out.polished_gff3 == [] },
-                { assert process.out.unmapped_txt != null },
-                { assert process.out.versions != null },
+                {
+                    assert snapshot(
+                        (
+                            [process.out.gff3[0][0].toString()] + // meta
+                            process.out.gff3.collect { file(it[1]).getName() } +
+                            process.out.polished_gff3.collect { file(it[1]).getName() } +
+                            process.out.unmapped_txt.collect { file(it[1]).getName() }
+                        ).sort()
+                    ).match("homo_sapiens-genome_21_fasta-genome_1_fasta-genome_1_gtf-for_stub_match")
+                },
                 { assert snapshot(process.out.versions).match("versions") }
             )
         }
diff --git a/modules/pfr/liftoff/tests/main.nf.test.snap b/modules/pfr/liftoff/tests/main.nf.test.snap
index 36c39b6..baa4d70 100644
--- a/modules/pfr/liftoff/tests/main.nf.test.snap
+++ b/modules/pfr/liftoff/tests/main.nf.test.snap
@@ -19,5 +19,16 @@
             ]
         ],
         "timestamp": "2023-12-01T13:57:40.752414"
+    },
+    "homo_sapiens-genome_21_fasta-genome_1_fasta-genome_1_gtf-for_stub_match": {
+        "content": [
+            [
+                "test.gff3",
+                "test.polished.gff3",
+                "test.unmapped.txt",
+                "{id=test}"
+            ]
+        ],
+        "timestamp": "2023-12-21T15:20:04.816416"
     }
 }
\ No newline at end of file

From d0faf8d68b3679602c11f4102c1a463b9fd9cb02 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Thu, 21 Dec 2023 16:21:51 +1300
Subject: [PATCH 40/59] Cleaned up fasta_liftoff

---
 conf/base.config                    |   7 +-
 nextflow.config                     |   8 +-
 subworkflows/local/fasta_liftoff.nf | 137 +++++++++++++++-------------
 workflows/pangene.nf                |  54 +++++------
 4 files changed, 106 insertions(+), 100 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index 54114d3..4467c0b 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -66,10 +66,9 @@ process {
     withLabel:process_high_memory {
         memory              = { check_max( 200.GB * task.attempt, 'memory' ) }
     }
-    
-    // Custom
-    withLabel:process_week_long {
-        time                = { check_max( 7.days  * task.attempt, 'time'  ) }
+
+    withName:CUSTOM_DUMPSOFTWAREVERSIONS {
+        cache = false
     }
 }
 
diff --git a/nextflow.config b/nextflow.config
index ef139b4..14544a8 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -54,12 +54,12 @@ params {
 
     liftoff_xref_annotations    = [
         [
-            "/output/genomic/fairGenomes/Plant/Actinidia/chinensis/var_chinensis/male/2x/assembly_russell/v2.1/Russell_V2a.chromosomes.and.unassiged.and.haplotigs.fsa",
-            "/output/genomic/fairGenomes/Plant/Actinidia/chinensis/var_chinensis/male/2x/assembly_russell/v2.1/RU01.20221115150135.gff3"
+            ".test/liftoff/Russell_V2a.chromosomes.and.unassiged.and.haplotigs.fsa",
+            ".test/liftoff/RU01.20221115150135.gff3"
         ],
         [
-            "/output/genomic/fairGenomes/Plant/Arabidopsis/thaliana/var_na/sex_na/2x/assembly_tair/v10/TAIR10_chr_all.fas",
-            "/output/genomic/fairGenomes/Plant/Arabidopsis/thaliana/var_na/sex_na/2x/assembly_tair/v10/TAIR10_GFF3_genes_transposons.fixed.gff3"
+            ".test/liftoff/TAIR10_chr_all.fas",
+            ".test/liftoff/TAIR10_GFF3_genes_transposons.fixed.gff3"
         ]
     ]
     // Format:      [ [ fasta(.gz), gff3(.gz) ] ]
diff --git a/subworkflows/local/fasta_liftoff.nf b/subworkflows/local/fasta_liftoff.nf
index 5e6fd22..8952e1d 100644
--- a/subworkflows/local/fasta_liftoff.nf
+++ b/subworkflows/local/fasta_liftoff.nf
@@ -1,85 +1,96 @@
 include { GUNZIP as GUNZIP_FASTA    } from '../../modules/nf-core/gunzip'
 include { GUNZIP as GUNZIP_GFF      } from '../../modules/nf-core/gunzip'
 include { GFFREAD                   } from '../../modules/nf-core/gffread'
-include { LIFTOFF                   } from '../../modules/local/liftoff'
+include { LIFTOFF                   } from '../../modules/pfr/liftoff'
 
 workflow FASTA_LIFTOFF {
     take:
-    target_assemby              // Channel: [ meta, fasta ]
-    xref_annotations_fasta      // Channel: [ meta2, fasta ]
-    xref_annotations_gff        // Channel: [ meta2, gff3 ]
+    target_assemby                  // Channel: [ meta, fasta ]
+    xref_fasta                      // Channel: [ meta2, fasta ]
+    xref_gff                        // Channel: [ meta2, gff3 ]
     
     main:
-    // MODULE: GUNZIP_FASTA
-    xref_annotations_fasta
-    | branch { meta, file ->
-        gz: "$file".endsWith(".gz")
-        rest: !"$file".endsWith(".gz")
-    }
-    | set { xref_annotations_fasta_branch }
+    ch_versions                     = Channel.empty()
 
-    GUNZIP_FASTA(
-        xref_annotations_fasta_branch.gz
-    )
-    .gunzip
-    | mix(
-        xref_annotations_fasta_branch.rest
-    )
-    | set { ch_xref_annotations_gunzip_fasta }
+    // MODULE: GUNZIP as GUNZIP_FASTA
+    ch_xref_fasta_branch            = xref_fasta
+                                    | branch { meta, file ->
+                                        gz: "$file".endsWith(".gz")
+                                        rest: !"$file".endsWith(".gz")
+                                    }
+
+    GUNZIP_FASTA ( ch_xref_fasta_branch.gz )
+    
+    ch_xref_gunzip_fasta            = GUNZIP_FASTA.out.gunzip
+                                    | mix(
+                                        ch_xref_fasta_branch.rest
+                                    )
+    
+    ch_versions                     = ch_versions.mix(GUNZIP_FASTA.out.versions.first())
 
-    // MODULE: GUNZIP_GFF
-    xref_annotations_gff
-    | branch { meta, file ->
-        gz: "$file".endsWith(".gz")
-        rest: !"$file".endsWith(".gz")
-    }
-    | set { xref_annotations_gff_branch }
+    // MODULE: GUNZIP as GUNZIP_GFF
+    ch_xref_gff_branch              = xref_gff
+                                    | branch { meta, file ->
+                                        gz: "$file".endsWith(".gz")
+                                        rest: !"$file".endsWith(".gz")
+                                    }
 
-    GUNZIP_GFF(
-        xref_annotations_gff_branch.gz
-    )
-    .gunzip
-    | mix(
-        xref_annotations_gff_branch.rest
-    )
-    | set { ch_xref_annotations_gunzip_gff }
+    GUNZIP_GFF ( ch_xref_gff_branch.gz )
+
+    ch_xref_gunzip_gff              = GUNZIP_GFF.out.gunzip
+                                    | mix(
+                                        ch_xref_gff_branch.rest
+                                    )
+
+    ch_versions                     = ch_versions.mix(GUNZIP_GFF.out.versions.first())
 
     // MODULE: GFFREAD
-    GFFREAD(
-        ch_xref_annotations_gunzip_gff
-    )
-    .gff
-    | set { ch_gffread_gff }
+    ch_gffread_inputs               = ch_xref_gunzip_gff
+                                    | map { meta, gff ->
+                                        [ gff.getSimpleName(), meta, gff ]
+                                    } // For meta insertion later, remove when GFFREAD has meta
+
+    GFFREAD ( ch_gffread_inputs.map { name, meta, gff -> gff } )
+
+    ch_gffread_gff                  = GFFREAD.out.gffread_gff
+                                    | map { gff -> [ gff.getSimpleName(), gff ] }
+                                    | join(ch_gffread_inputs)
+                                    | map { fid, gffread_gff, meta, gff -> [ meta, gffread_gff ] }
+                                    // meta insertion
+    
+    ch_versions                     = ch_versions.mix(GFFREAD.out.versions.first())
 
     // MODULE: LIFTOFF
-    target_assemby
-    | combine(
-        ch_xref_annotations_gunzip_fasta
-        | join(
-            ch_gffread_gff
-        )
-    )
-    | map { meta, targetFasta, refMeta, refFasta, refGFF  ->
-        [[id:"${meta.id}.from.${refMeta.id}", target_assemby: meta.id], targetFasta, refFasta, refGFF]
-    }
-    | set { ch_liftoff_inputs }
+    ch_liftoff_inputs               = target_assemby
+                                    | combine(
+                                        ch_xref_gunzip_fasta
+                                        | join(
+                                            ch_gffread_gff
+                                        )
+                                    )
+                                    | map { meta, target_fa, ref_meta, ref_fa, ref_gff ->
+                                        [
+                                            [
+                                                id: "${meta.id}.from.${ref_meta.id}",
+                                                target_assemby: meta.id
+                                            ],
+                                            target_fa,
+                                            ref_fa,
+                                            ref_gff
+                                        ]
+                                    }
 
     LIFTOFF(
-        ch_liftoff_inputs.map { meta, targetFasta, refFasta, refGFF -> [meta, targetFasta] },
-        ch_liftoff_inputs.map { meta, targetFasta, refFasta, refGFF -> refFasta },
-        ch_liftoff_inputs.map { meta, targetFasta, refFasta, refGFF -> refGFF }
+        ch_liftoff_inputs.map { meta, target_fa, ref_fa, ref_gff -> [ meta, target_fa ] },
+        ch_liftoff_inputs.map { meta, target_fa, ref_fa, ref_gff -> ref_fa },
+        ch_liftoff_inputs.map { meta, target_fa, ref_fa, ref_gff -> ref_gff }
     )
-    .polished_gff3
-    | map { meta, gff -> [[id: meta.target_assemby], gff] }
-    | groupTuple
-    | set { ch_liftoff_gff3 }
 
-    Channel.empty()
-    | mix(GUNZIP_FASTA.out.versions.first())
-    | mix(GUNZIP_GFF.out.versions.first())
-    | mix(GFFREAD.out.versions.first())
-    | mix(LIFTOFF.out.versions.first())
-    | set { ch_versions }
+    ch_liftoff_gff3                 = LIFTOFF.out.polished_gff3
+                                    | map { meta, gff -> [ [ id: meta.target_assemby ], gff ] }
+                                    | groupTuple
+    
+    ch_versions                     = ch_versions.mix(LIFTOFF.out.versions.first())
 
     emit:
     gff3        = ch_liftoff_gff3               // [ meta, [ gff3 ] ]
diff --git a/workflows/pangene.nf b/workflows/pangene.nf
index 4482c05..7cc8c4a 100644
--- a/workflows/pangene.nf
+++ b/workflows/pangene.nf
@@ -1,15 +1,11 @@
 include { validateParams                } from '../modules/local/validate_params'
-
 include { PREPARE_ASSEMBLY              } from '../subworkflows/local/prepare_assembly'
 include { PREPROCESS_RNASEQ             } from '../subworkflows/local/preprocess_rnaseq'
 include { ALIGN_RNASEQ                  } from '../subworkflows/local/align_rnaseq'
 include { PREPARE_EXT_PROTS             } from '../subworkflows/local/prepare_ext_prots'
-
 include { BRAKER3                       } from '../modules/kherronism/braker3'
-
-// include { FASTA_LIFTOFF                 } from '../subworkflows/local/fasta_liftoff'
-
-// include { CUSTOM_DUMPSOFTWAREVERSIONS   } from '../modules/nf-core/custom/dumpsoftwareversions'
+include { FASTA_LIFTOFF                 } from '../subworkflows/local/fasta_liftoff'
+include { CUSTOM_DUMPSOFTWAREVERSIONS   } from '../modules/nf-core/custom/dumpsoftwareversions'
 
 validateParams(params)
 
@@ -50,22 +46,22 @@ workflow PANGENE {
                                 ? Channel.fromList(params.external_protein_fastas)
                                 | map { filePath ->
                                     def fileHandle = file(filePath, checkIfExists: true)
-                                    [ [id: fileHandle.getSimpleName() ], fileHandle]
+                                    [ [ id: fileHandle.getSimpleName() ], fileHandle]
                                 }
                                 : Channel.empty()
     
-    // ch_xref_annotations_mm      = params.liftoff_xref_annotations
-    //                             ? Channel.fromList(params.liftoff_xref_annotations)
-    //                             | multiMap { fasta, gff ->
-    //                                 def fastaFile = file(fasta, checkIfExists:true)
+    ch_xref_mm                  = params.liftoff_xref_annotations
+                                ? Channel.fromList(params.liftoff_xref_annotations)
+                                | multiMap { fasta, gff ->
+                                    def fastaFile = file(fasta, checkIfExists:true)
 
-    //                                 fasta: [[id:fastaFile.getSimpleName()], fastaFile]
-    //                                 gff: [[id:fastaFile.getSimpleName()], file(gff, checkIfExists:true)]
-    //                             }
-    //                             : Channel.empty()
+                                    fasta: [ [ id: fastaFile.getSimpleName() ], fastaFile ]
+                                    gff: [ [ id: fastaFile.getSimpleName() ], file(gff, checkIfExists:true) ]
+                                }
+                                : Channel.empty()
 
-    // ch_xref_annotations_fasta   = ch_xref_annotations_mm.fasta
-    // ch_xref_annotations_gff     = ch_xref_annotations_mm.gff
+    ch_xref_fasta               = ch_xref_mm.fasta
+    ch_xref_gff                 = ch_xref_mm.gff
 
     // SUBWORKFLOW: PREPARE_ASSEMBLY
     PREPARE_ASSEMBLY(
@@ -136,18 +132,18 @@ workflow PANGENE {
     ch_braker_gff3              = BRAKER3.out.gff3
     ch_versions                 = ch_versions.mix(BRAKER3.out.versions.first())
 
-    // // SUBWORKFLOW: FASTA_LIFTOFF
-    // FASTA_LIFTOFF(
-    //     ch_valid_target_assembly,
-    //     ch_xref_annotations_fasta,
-    //     ch_xref_annotations_gff
-    // )
+    // SUBWORKFLOW: FASTA_LIFTOFF
+    FASTA_LIFTOFF(
+        ch_valid_target_assembly,
+        ch_xref_fasta,
+        ch_xref_gff
+    )
 
-    // ch_liftoff_gff3             = FASTA_LIFTOFF.out.gff3
-    // ch_versions                 = ch_versions.mix(FASTA_LIFTOFF.out.versions)
+    ch_liftoff_gff3             = FASTA_LIFTOFF.out.gff3
+    ch_versions                 = ch_versions.mix(FASTA_LIFTOFF.out.versions)
 
-    // // MODULE: CUSTOM_DUMPSOFTWAREVERSIONS
-    // CUSTOM_DUMPSOFTWAREVERSIONS (
-    //     ch_versions.unique().collectFile(name: 'collated_versions.yml')
-    // )
+    // MODULE: CUSTOM_DUMPSOFTWAREVERSIONS
+    CUSTOM_DUMPSOFTWAREVERSIONS (
+        ch_versions.unique().collectFile(name: 'collated_versions.yml')
+    )
 }
\ No newline at end of file

From 27a1293e458bc3c5e3dcc0c1cd06c9751e78b25a Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Mon, 8 Jan 2024 09:55:12 +1300
Subject: [PATCH 41/59] Updated fasta_edta_lai

---
 modules.json                                  |  4 ++--
 modules/pfr/edta/edta/main.nf                 |  2 +-
 modules/pfr/edta/edta/meta.yml                | 21 +++++++++++++++++++
 modules/pfr/edta/edta/tests/main.nf.test      | 21 ++++++++++++-------
 modules/pfr/edta/edta/tests/nextflow.config   |  2 +-
 .../pfr/fasta_edta_lai/tests/main.nf.test     | 20 ++++++++++++++----
 .../fasta_edta_lai/tests/main.nf.test.snap    | 11 ++++++++++
 7 files changed, 65 insertions(+), 16 deletions(-)
 create mode 100644 subworkflows/pfr/fasta_edta_lai/tests/main.nf.test.snap

diff --git a/modules.json b/modules.json
index 633ed49..f40e823 100644
--- a/modules.json
+++ b/modules.json
@@ -17,7 +17,7 @@
           },
           "edta/edta": {
             "branch": "main",
-            "git_sha": "d14b1f1d790cc01c11f8ec5aa80a9562c6808b20",
+            "git_sha": "35468dbb1f35eb17a43d7e05544601c7c3f8cd90",
             "installed_by": ["fasta_edta_lai", "modules"]
           },
           "lai": {
@@ -36,7 +36,7 @@
         "pfr": {
           "fasta_edta_lai": {
             "branch": "main",
-            "git_sha": "fde2f37998ba54ec9c0b5cf65a2f28f14af981b0",
+            "git_sha": "5ae026a98da1331433fa4cf5b667c9abdf43e395",
             "installed_by": ["subworkflows"]
           }
         }
diff --git a/modules/pfr/edta/edta/main.nf b/modules/pfr/edta/edta/main.nf
index 458f525..a81c528 100644
--- a/modules/pfr/edta/edta/main.nf
+++ b/modules/pfr/edta/edta/main.nf
@@ -42,7 +42,7 @@ process EDTA_EDTA {
         $rmout_file \\
         $exclude_file \\
         $args \\
-        &> "${prefix}.log"
+        &> >(tee "${prefix}.log" 2>&1)
 
     mv \\
         "${mod_file_name}.EDTA.TElib.fa" \\
diff --git a/modules/pfr/edta/edta/meta.yml b/modules/pfr/edta/edta/meta.yml
index 4d59fdf..52503b8 100644
--- a/modules/pfr/edta/edta/meta.yml
+++ b/modules/pfr/edta/edta/meta.yml
@@ -25,6 +25,27 @@ input:
       type: file
       description: Genome fasta file
       pattern: "*.{fsa,fa,fasta}"
+  - cds:
+      type: file
+      description: |
+        A FASTA file containing the coding sequence (no introns, UTRs, nor TEs)
+        of this genome or its close relative
+      pattern: "*.{fsa,fa,fasta}"
+  - curatedlib:
+      type: file
+      description: |
+        A curated library to keep consistent naming and classification for known TEs
+      pattern: "*.liban"
+  - rmout:
+      type: file
+      description: |
+        Homology-based TE annotation instead of using the EDTA library for masking in
+        RepeatMasker .out format
+      pattern: "*.out"
+  - exclude:
+      type: file
+      description: Exclude regions (bed format) from TE masking in the MAKER.masked output
+      pattern: "*.bed"
 output:
   - meta:
       type: map
diff --git a/modules/pfr/edta/edta/tests/main.nf.test b/modules/pfr/edta/edta/tests/main.nf.test
index d0a7142..3aed0a2 100644
--- a/modules/pfr/edta/edta/tests/main.nf.test
+++ b/modules/pfr/edta/edta/tests/main.nf.test
@@ -15,14 +15,19 @@ nextflow_process {
         when {
             process {
                 """
-                input[0] = [
-                    [ id:'test' ], // meta map
-                    file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
-                ]
-                input[1] = []
-                input[2] = []
-                input[3] = []
-                input[4] = []
+                input[0]    = Channel.of(file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true))
+                            | map { f ->
+                                (
+                                    ['>Chr21'] +
+                                    f.readLines().subList(66666.toInteger(), 116666.toInteger()) // 4 MB to 7 MB; 60 bases per line
+                                ).join('\\n')
+                            }
+                            | collectFile(name: 'genome_3_to_10_mb.fasta')
+                            | map { f -> [ [ id: 'test'], f ] }
+                input[1]    = []
+                input[2]    = []
+                input[3]    = []
+                input[4]    = []
                 """
             }
         }
diff --git a/modules/pfr/edta/edta/tests/nextflow.config b/modules/pfr/edta/edta/tests/nextflow.config
index b20ca5e..e58e10e 100644
--- a/modules/pfr/edta/edta/tests/nextflow.config
+++ b/modules/pfr/edta/edta/tests/nextflow.config
@@ -1,3 +1,3 @@
 process {
-    ext.args = '--anno 1 --evaluate 1'
+    ext.args = '--anno 1'
 }
diff --git a/subworkflows/pfr/fasta_edta_lai/tests/main.nf.test b/subworkflows/pfr/fasta_edta_lai/tests/main.nf.test
index a4fa87b..e852a70 100644
--- a/subworkflows/pfr/fasta_edta_lai/tests/main.nf.test
+++ b/subworkflows/pfr/fasta_edta_lai/tests/main.nf.test
@@ -15,13 +15,25 @@ nextflow_workflow {
 
     test("test_data") {
 
+        setup {
+            run("GUNZIP") {
+                script "../../../../modules/nf-core/gunzip"
+
+                process {
+                    """
+                    input[0] = [
+                        [ id:'test' ],
+                        file('/Users/hrauxr/Projects/nxf-modules/tests/data/genome.fasta.gz', checkIfExists: true)
+                    ]
+                    """
+                }
+            }
+        }
+
         when {
             workflow {
                 """
-                input[0] = Channel.of([
-                    [ id:'test' ],
-                    file("/Users/hrauxr/Projects/nxf-modules/data/chr1.fa", checkIfExists: true)
-                ])
+                input[0] = GUNZIP.out.gunzip
                 input[1] = []
                 input[2] = false
                 """
diff --git a/subworkflows/pfr/fasta_edta_lai/tests/main.nf.test.snap b/subworkflows/pfr/fasta_edta_lai/tests/main.nf.test.snap
new file mode 100644
index 0000000..574acc9
--- /dev/null
+++ b/subworkflows/pfr/fasta_edta_lai/tests/main.nf.test.snap
@@ -0,0 +1,11 @@
+{
+    "versions": {
+        "content": [
+            [
+                "versions.yml:md5,0d4bc49e94acb8995ca552d4e666e3ce",
+                "versions.yml:md5,754bb19f86be761d90c002a0af2faf1c"
+            ]
+        ],
+        "timestamp": "2023-12-22T14:09:24.171934"
+    }
+}
\ No newline at end of file

From 3867ed5e1a60646182afb14f7d00d1fe24005f8b Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Mon, 8 Jan 2024 10:17:31 +1300
Subject: [PATCH 42/59] Added script for local stub run

---
 pangene_local_stub.sh | 9 +++++++++
 pangene_pfr.sh        | 5 ++++-
 2 files changed, 13 insertions(+), 1 deletion(-)
 create mode 100755 pangene_local_stub.sh

diff --git a/pangene_local_stub.sh b/pangene_local_stub.sh
new file mode 100755
index 0000000..e8227d5
--- /dev/null
+++ b/pangene_local_stub.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+
+nextflow \
+    main.nf \
+    -profile local,docker \
+    -resume \
+    -stub \
+    --max_cpus=1 \
+    --max_memory=1.GB
\ No newline at end of file
diff --git a/pangene_pfr.sh b/pangene_pfr.sh
index ab3d262..ca1a335 100644
--- a/pangene_pfr.sh
+++ b/pangene_pfr.sh
@@ -16,4 +16,7 @@ ml nextflow/23.04.4
 export TMPDIR="/workspace/$USER/tmp"
 export APPTAINER_BINDPATH="$APPTAINER_BINDPATH,$TMPDIR:$TMPDIR,$TMPDIR:/tmp"
 
-nextflow main.nf -profile pfr,apptainer -resume
\ No newline at end of file
+nextflow \
+    main.nf \
+    -profile pfr,apptainer \
+    -resume
\ No newline at end of file

From 476149154cec16229c903624ccc8197b9a4e20c6 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Mon, 8 Jan 2024 11:26:41 +1300
Subject: [PATCH 43/59] Samplesheet now accepts relative paths

---
 nextflow.config                       |  2 +-
 pangene_local_stub.sh                 |  3 ++-
 subworkflows/local/extract_samples.nf | 25 +++++++++++++++++--------
 3 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index 14544a8..d2a0651 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -25,7 +25,7 @@ params {
     
     repeatmasker_save_outputs   = true
     
-    samplesheet                 = "./.test/samplesheet_small.csv"
+    samplesheet                 = "./.test/samplesheet.csv"
     // Optional: Set to null if not available
 
     skip_fastqc                 = false
diff --git a/pangene_local_stub.sh b/pangene_local_stub.sh
index e8227d5..0a18650 100755
--- a/pangene_local_stub.sh
+++ b/pangene_local_stub.sh
@@ -6,4 +6,5 @@ nextflow \
     -resume \
     -stub \
     --max_cpus=1 \
-    --max_memory=1.GB
\ No newline at end of file
+    --max_memory=1.GB \
+    --samplesheet="./.test/samplesheet_small.csv"
\ No newline at end of file
diff --git a/subworkflows/local/extract_samples.nf b/subworkflows/local/extract_samples.nf
index e63bbe0..957f218 100644
--- a/subworkflows/local/extract_samples.nf
+++ b/subworkflows/local/extract_samples.nf
@@ -22,7 +22,10 @@ workflow EXTRACT_SAMPLES {
     SAMPLESHEET_CHECK ( samplesheet, permissible_target_assemblies )
     .csv
     | splitCsv ( header:true, sep:',' )
-    | map { create_fastq_channel(it) }
+    | combine ( samplesheet )
+    | map { row, sheet ->
+        create_fastq_channel(row, sheet.getParent())
+    }
     | set { ch_reads }
 
     reads = ch_reads.map { meta, fastq -> [[id:meta.id, single_end:meta.single_end], fastq]}
@@ -40,25 +43,31 @@ workflow EXTRACT_SAMPLES {
 }
 
 // Function to get list of [ meta, [ fastq_1, fastq_2 ] ]
-def create_fastq_channel(LinkedHashMap row) {
+def create_fastq_channel(LinkedHashMap row, sheetPath) {
     // create meta map
     def meta = [:]
     meta.id                 = row.sample
     meta.single_end         = row.single_end.toBoolean()
     meta.target_assemblies  = row.target_assemblies.split(";").sort()
 
+    def fq1                 = row.fastq_1.startsWith("/") ? row.fastq_1 : "$sheetPath/${row.fastq_1}"
+    def fq2                 = row.fastq_2.startsWith("/") ? row.fastq_2 : "$sheetPath/${row.fastq_2}"
+
     // add path(s) of the fastq file(s) to the meta map
     def fastq_meta = []
-    if (!file(row.fastq_1).exists()) {
-        exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}"
+    if (!file(fq1).exists()) {
+        exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${fq1}"
     }
     if (meta.single_end) {
-        fastq_meta = [ meta, [ file(row.fastq_1) ] ]
+        fastq_meta = [ meta, [ file(fq1) ] ]
     } else {
-        if (!file(row.fastq_2).exists()) {
-            exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}"
+        if (!file(fq2).exists()) {
+            exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${fq2}"
         }
-        fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ]
+        fastq_meta = [ meta, [ file(fq1), file(fq2) ] ]
     }
+
+    println fastq_meta
+
     return fastq_meta
 }
\ No newline at end of file

From 8ced02787818d8da7c3e6e7b158da58512137138 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Mon, 8 Jan 2024 11:34:25 +1300
Subject: [PATCH 44/59] Updated modules

---
 modules.json                                  |   4 +-
 .../dumpsoftwareversions/tests/main.nf.test   |   7 +-
 .../tests/main.nf.test.snap                   |  50 ++--
 modules/nf-core/fastqc/tests/main.nf.test     | 271 ++++++++++++------
 .../nf-core/fastqc/tests/main.nf.test.snap    |  12 +-
 5 files changed, 238 insertions(+), 106 deletions(-)

diff --git a/modules.json b/modules.json
index f40e823..cdb8a24 100644
--- a/modules.json
+++ b/modules.json
@@ -73,7 +73,7 @@
           },
           "custom/dumpsoftwareversions": {
             "branch": "master",
-            "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e",
+            "git_sha": "37dee863936732fe7e05dc598bf6e183a8e7ef73",
             "installed_by": ["modules"]
           },
           "fastavalidator": {
@@ -88,7 +88,7 @@
           },
           "fastqc": {
             "branch": "master",
-            "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53",
+            "git_sha": "617777a807a1770f73deb38c80004bac06807eef",
             "installed_by": ["fastq_fastqc_umitools_fastp", "modules"]
           },
           "gffread": {
diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test
index eec1db1..b1e1630 100644
--- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test
+++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test
@@ -31,7 +31,12 @@ nextflow_process {
         then {
             assertAll(
                 { assert process.success },
-                { assert snapshot(process.out).match() }
+                { assert snapshot(
+                    process.out.versions,
+                    file(process.out.mqc_yml[0]).readLines()[0..10],
+                    file(process.out.yml[0]).readLines()[0..7]
+                    ).match()
+                }
             )
         }
     }
diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap
index 4274ed5..29e7244 100644
--- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap
+++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap
@@ -1,27 +1,33 @@
 {
     "Should run without failures": {
         "content": [
-            {
-                "0": [
-                    "software_versions.yml:md5,1c851188476409cda5752ce971b20b58"
-                ],
-                "1": [
-                    "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d"
-                ],
-                "2": [
-                    "versions.yml:md5,3843ac526e762117eedf8825b40683df"
-                ],
-                "mqc_yml": [
-                    "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d"
-                ],
-                "versions": [
-                    "versions.yml:md5,3843ac526e762117eedf8825b40683df"
-                ],
-                "yml": [
-                    "software_versions.yml:md5,1c851188476409cda5752ce971b20b58"
-                ]
-            }
+            [
+                "versions.yml:md5,3843ac526e762117eedf8825b40683df"
+            ],
+            [
+                "data: \"<style>\\n#nf-core-versions tbody:nth-child(even) {\\n    background-color: #f2f2f2;\\n\\",
+                "  }\\n</style>\\n<table class=\\\"table\\\" style=\\\"width:100%\\\" id=\\\"nf-core-versions\\\"\\",
+                "  >\\n    <thead>\\n        <tr>\\n            <th> Process Name </th>\\n            <th>\\",
+                "  \\ Software </th>\\n            <th> Version  </th>\\n        </tr>\\n    </thead>\\n\\",
+                "  \\n<tbody>\\n<tr>\\n    <td><samp>CUSTOM_DUMPSOFTWAREVERSIONS</samp></td>\\n    <td><samp>python</samp></td>\\n\\",
+                "  \\    <td><samp>3.12.0</samp></td>\\n</tr>\\n\\n<tr>\\n    <td><samp></samp></td>\\n \\",
+                "  \\   <td><samp>yaml</samp></td>\\n    <td><samp>6.0.1</samp></td>\\n</tr>\\n\\n</tbody>\\n\\",
+                "  <tbody>\\n<tr>\\n    <td><samp>TOOL1</samp></td>\\n    <td><samp>tool1</samp></td>\\n\\",
+                "  \\    <td><samp>0.11.9</samp></td>\\n</tr>\\n\\n</tbody>\\n<tbody>\\n<tr>\\n    <td><samp>TOOL2</samp></td>\\n\\",
+                "  \\    <td><samp>tool2</samp></td>\\n    <td><samp>1.9</samp></td>\\n</tr>\\n\\n</tbody>\\n\\",
+                "  <tbody>\\n<tr>\\n    <td><samp>Workflow</samp></td>\\n    <td><samp>Nextflow</samp></td>\\n\\"
+            ],
+            [
+                "CUSTOM_DUMPSOFTWAREVERSIONS:",
+                "  python: 3.12.0",
+                "  yaml: 6.0.1",
+                "TOOL1:",
+                "  tool1: 0.11.9",
+                "TOOL2:",
+                "  tool2: '1.9'",
+                "Workflow:"
+            ]
         ],
-        "timestamp": "2023-11-03T14:43:22.157011"
+        "timestamp": "2024-01-05T00:18:43.461970077"
     }
-}
+}
\ No newline at end of file
diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test
index b9e8f92..ad9bc54 100644
--- a/modules/nf-core/fastqc/tests/main.nf.test
+++ b/modules/nf-core/fastqc/tests/main.nf.test
@@ -3,23 +3,21 @@ nextflow_process {
     name "Test Process FASTQC"
     script "../main.nf"
     process "FASTQC"
+
     tag "modules"
     tag "modules_nfcore"
     tag "fastqc"
 
-    test("Single-Read") {
+    test("sarscov2 single-end [fastq]") {
 
         when {
-            params {
-                outdir   = "$outputDir"
-            }
             process {
                 """
                 input[0] = [
-                    [ id: 'test', single_end:true ],
-                    [
-                        file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
-                    ]
+                            [ id: 'test', single_end:true ],
+                            [
+                                file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
+                            ]
                 ]
                 """
             }
@@ -28,82 +26,195 @@ nextflow_process {
         then {
             assertAll (
             { assert process.success },
+
             // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it.
             // looks like this: <div id="header_filename">Mon 2 Oct 2023<br/>test.gz</div>
             // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039
-            { assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html" },
-            { assert path(process.out.html.get(0).get(1)).getText().contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
-            { assert snapshot(process.out.versions).match("versions") },
-            { assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip" }
+
+            { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" },
+            { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" },
+            { assert path(process.out.html[0][1]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
+
+            { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+    }
+
+    test("sarscov2 paired-end [fastq]") {
+
+        when {
+            process {
+            """
+            input[0] = [
+                        [id: 'test', single_end: false], // meta map
+                        [
+                            file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+                            file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
+                        ]
+            ]
+            """
+            }
+        }
+
+        then {
+            assertAll (
+            { assert process.success },
+
+            { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" },
+            { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" },
+            { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" },
+            { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" },
+            { assert path(process.out.html[0][1][0]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
+            { assert path(process.out.html[0][1][1]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
+
+            { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+    }
+
+    test("sarscov2 interleaved [fastq]") {
+
+        when {
+            process {
+            """
+            input[0] = [
+                        [id: 'test', single_end: false], // meta map
+                        file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true)
+            ]
+            """
+            }
+        }
+
+        then {
+            assertAll (
+            { assert process.success },
+
+            { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" },
+            { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" },
+            { assert path(process.out.html[0][1]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
+
+            { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+    }
+
+    test("sarscov2 paired-end [bam]") {
+
+        when {
+            process {
+            """
+            input[0] = [
+                        [id: 'test', single_end: false], // meta map
+                        file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
+            ]
+            """
+            }
+        }
+
+        then {
+            assertAll (
+            { assert process.success },
+
+            { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" },
+            { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" },
+            { assert path(process.out.html[0][1]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
+
+            { assert snapshot(process.out.versions).match("versions") }
             )
         }
     }
-// TODO
-// //
-// // Test with paired-end data
-// //
-// workflow test_fastqc_paired_end {
-//     input = [
-//                 [id: 'test', single_end: false], // meta map
-//                 [
-//                     file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
-//                     file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
-//                 ]
-//             ]
-
-//     FASTQC ( input )
-// }
-
-// //
-// // Test with interleaved data
-// //
-// workflow test_fastqc_interleaved {
-//     input = [
-//                 [id: 'test', single_end: false], // meta map
-//                 file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true)
-//             ]
-
-//     FASTQC ( input )
-// }
-
-// //
-// // Test with bam data
-// //
-// workflow test_fastqc_bam {
-//     input = [
-//                 [id: 'test', single_end: false], // meta map
-//                 file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
-//             ]
-
-//     FASTQC ( input )
-// }
-
-// //
-// // Test with multiple samples
-// //
-// workflow test_fastqc_multiple {
-//     input = [
-//                 [id: 'test', single_end: false], // meta map
-//                 [
-//                     file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
-//                     file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true),
-//                     file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true),
-//                     file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true)
-//                 ]
-//             ]
-
-//     FASTQC ( input )
-// }
-
-// //
-// // Test with custom prefix
-// //
-// workflow test_fastqc_custom_prefix {
-//     input = [
-//                 [ id:'mysample', single_end:true ], // meta map
-//                 file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
-//             ]
-
-//     FASTQC ( input )
-// }
+
+    test("sarscov2 multiple [fastq]") {
+
+        when {
+            process {
+            """
+            input[0] = [
+                        [id: 'test', single_end: false], // meta map
+                        [
+                            file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+                            file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true),
+                            file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true),
+                            file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true)
+                        ]
+            ]
+            """
+            }
+        }
+
+        then {
+            assertAll (
+            { assert process.success },
+
+            { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" },
+            { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" },
+            { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" },
+            { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" },
+            { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" },
+            { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" },
+            { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" },
+            { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" },
+            { assert path(process.out.html[0][1][0]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
+            { assert path(process.out.html[0][1][1]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
+            { assert path(process.out.html[0][1][2]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
+            { assert path(process.out.html[0][1][3]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
+
+            { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+    }
+
+    test("sarscov2 custom_prefix") {
+
+        when {
+            process {
+            """
+            input[0] = [
+                        [ id:'mysample', single_end:true ], // meta map
+                        file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
+            ]
+            """
+            }
+        }
+
+        then {
+            assertAll (
+            { assert process.success },
+
+            { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" },
+            { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" },
+            { assert path(process.out.html[0][1]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
+
+            { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+    }
+
+    test("sarscov2 single-end [fastq] - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                            [ id: 'test', single_end:true ],
+                            [
+                                file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
+                            ]
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll (
+            { assert process.success },
+            { assert snapshot(process.out.html.collect { file(it[1]).getName() } +
+                                process.out.zip.collect { file(it[1]).getName() } +
+                                process.out.versions ).match() }
+            )
+        }
+    }
+
 }
diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap
index 636a32c..5ef5afb 100644
--- a/modules/nf-core/fastqc/tests/main.nf.test.snap
+++ b/modules/nf-core/fastqc/tests/main.nf.test.snap
@@ -1,10 +1,20 @@
 {
+    "sarscov2 single-end [fastq] - stub": {
+        "content": [
+            [
+                "test.html",
+                "test.zip",
+                "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+            ]
+        ],
+        "timestamp": "2023-12-29T02:48:05.126117287"
+    },
     "versions": {
         "content": [
             [
                 "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
             ]
         ],
-        "timestamp": "2023-10-09T23:40:54+0000"
+        "timestamp": "2023-12-29T02:46:49.507942667"
     }
 }
\ No newline at end of file

From fce65a05dc9b7ff009ddbceed4937c7818783b4e Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Mon, 8 Jan 2024 11:43:54 +1300
Subject: [PATCH 45/59] Removed -exclude_partial and updated flowchart

---
 README.md           | 7 ++++++-
 TODO.md             | 2 --
 conf/modules.config | 1 -
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 8efbcf0..e5251f2 100644
--- a/README.md
+++ b/README.md
@@ -10,6 +10,7 @@ flowchart TD
     TARGET_ASSEMBLIES
     TE_LIBRARIES
     FASTA_VALIDATE
+    fasta_file_from_fasta_validate
     EDTA
     REPEATMASKER
     end
@@ -17,7 +18,8 @@ flowchart TD
     TARGET_ASSEMBLIES(["[target_assemblies]"])
     TE_LIBRARIES(["[te_libs]"])
     TARGET_ASSEMBLIES --> FASTA_VALIDATE
-    FASTA_VALIDATE --> EDTA
+    FASTA_VALIDATE --> |Fasta|fasta_file_from_fasta_validate(( ))
+    fasta_file_from_fasta_validate --> EDTA
     TE_LIBRARIES --> REPEATMASKER
     EDTA --> |te_lib absent|REPEATMASKER
 
@@ -28,6 +30,7 @@ flowchart TD
     FASTP
     FASTP_FASTQC
     SORTMERNA
+    fasta_file_for_star
     STAR
     SAMTOOLS_CAT
     end
@@ -39,6 +42,8 @@ flowchart TD
     FASTQC --> FASTP
     FASTP --> FASTP_FASTQC[FASTQC]
     FASTP_FASTQC --> SORTMERNA
+    fasta_file_for_star(( ))
+    fasta_file_for_star --> |Fasta|STAR
     SORTMERNA --> STAR
     STAR --> SAMTOOLS_CAT
 
diff --git a/TODO.md b/TODO.md
index 2366b0a..1d4fed8 100644
--- a/TODO.md
+++ b/TODO.md
@@ -12,6 +12,4 @@
 
 > https://www.biorxiv.org/content/10.1101/096529v2.full.pdf
 
-> Don't use `-exclude_partial`
-
 - [ ] Sort out EDTA testing
diff --git a/conf/modules.config b/conf/modules.config
index 392583a..474a129 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -135,7 +135,6 @@ if(params.liftoff_xref_annotations) {
         withName: LIFTOFF {
             ext.args = '    '
             ext.args = [
-                '-exclude_partial',
                 '-copies',
                 '-polish',
                 "-a $params.liftoff_coverage",

From 582f3faeeccf10105790f80836bf66e48ae06acd Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Mon, 8 Jan 2024 15:24:04 +1300
Subject: [PATCH 46/59] Separated test config for local and pfr

---
 TODO.md                               |  6 ++++++
 conf/local_stub_params.json           | 29 +++++++++++++++++++++++++++
 nextflow.config                       | 20 +++++++++---------
 pangene_local_stub.sh                 |  4 +---
 subworkflows/local/extract_samples.nf |  2 --
 5 files changed, 46 insertions(+), 15 deletions(-)
 create mode 100644 conf/local_stub_params.json

diff --git a/TODO.md b/TODO.md
index 1d4fed8..3742442 100644
--- a/TODO.md
+++ b/TODO.md
@@ -13,3 +13,9 @@
 > https://www.biorxiv.org/content/10.1101/096529v2.full.pdf
 
 - [ ] Sort out EDTA testing
+
+- Mib finder, eggnog, blastp against TAIR and uniprot (Wait)
+- entap to merge (Wait)
+- trinity and PASA + StringTie2 -> Evigene (Do)
+- othrofinder paper
+- gffcompre on braker and liftoff
diff --git a/conf/local_stub_params.json b/conf/local_stub_params.json
new file mode 100644
index 0000000..df155d5
--- /dev/null
+++ b/conf/local_stub_params.json
@@ -0,0 +1,29 @@
+{
+  "target_assemblies": [
+    ["red5_v2p1", ".test/red5_v2p1_chr1.fasta"],
+    ["donghong", ".test/donghong.chr1.fsa.gz"]
+  ],
+
+  "te_libraries": [["donghong", ".test/donghong.TElib.fa.gz"]],
+
+  "samplesheet": "./.test/samplesheet_small.csv",
+
+  "external_protein_fastas": [
+    ".test/ext_prots/Viridiplantae.fa.gz",
+    ".test/ext_prots/RU01.20221115150135.pep.fasta"
+  ],
+
+  "liftoff_xref_annotations": [
+    [
+      ".test/liftoff/Russell_V2a.chromosomes.and.unassiged.and.haplotigs.fsa",
+      ".test/liftoff/RU01.20221115150135.gff3"
+    ],
+    [
+      ".test/liftoff/TAIR10_chr_all.fas",
+      ".test/liftoff/TAIR10_GFF3_genes_transposons.fixed.gff3"
+    ]
+  ],
+
+  "max_cpus": 1,
+  "max_memory": "1.GB"
+}
diff --git a/nextflow.config b/nextflow.config
index d2a0651..e0d658b 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -2,8 +2,8 @@ includeConfig './conf/base.config'
 
 params {
     target_assemblies           = [
-        ["red5_v2p1", ".test/red5_v2p1_chr1.fasta"],
-        ["donghong", ".test/donghong.chr1.fsa.gz"]
+        ["red5_v2p1", "/workspace/pangene/test_data/red5_v2p1_chr1.fasta"],
+        ["donghong", "/workspace/pangene/test_data/donghong.chr1.fsa.gz"]
     ]
     // Pattern:             [ [tag, fasta(.gz) ] ]
     // Permissible tags:    tag, tag_1, tag_tag2_3, tag_tag2_tag3;
@@ -11,7 +11,7 @@ params {
     //                      "." is not allowed in the tag name
     
     te_libraries                = [
-        ["donghong", ".test/donghong.TElib.fa.gz"]
+        ["donghong", "/workspace/pangene/test_data/donghong.TElib.fa.gz"]
     ]
     // Pattern:             [ [tag, fasta(.gz) ] ]
     // Optional             Set to null if libraries are not available.
@@ -25,7 +25,7 @@ params {
     
     repeatmasker_save_outputs   = true
     
-    samplesheet                 = "./.test/samplesheet.csv"
+    samplesheet                 = "/workspace/pangene/test_data/samplesheet.csv"
     // Optional: Set to null if not available
 
     skip_fastqc                 = false
@@ -45,8 +45,8 @@ params {
     star_save_outputs           = true
 
     external_protein_fastas     = [
-        ".test/ext_prots/Viridiplantae.fa.gz",
-        ".test/ext_prots/RU01.20221115150135.pep.fasta"
+        "/workspace/ComparativeDataSources/OrthoDB11/Viridiplantae.fa.gz",
+        "/output/genomic/fairGenomes/Plant/Actinidia/chinensis/var_chinensis/male/2x/assembly_russell/v2.1/RU01.20221115150135.pep.fasta"
     ]
     // Optional: Set to null if not available
 
@@ -54,12 +54,12 @@ params {
 
     liftoff_xref_annotations    = [
         [
-            ".test/liftoff/Russell_V2a.chromosomes.and.unassiged.and.haplotigs.fsa",
-            ".test/liftoff/RU01.20221115150135.gff3"
+            "/output/genomic/fairGenomes/Plant/Actinidia/chinensis/var_chinensis/male/2x/assembly_russell/v2.1/Russell_V2a.chromosomes.and.unassiged.and.haplotigs.fsa",
+            "/output/genomic/fairGenomes/Plant/Actinidia/chinensis/var_chinensis/male/2x/assembly_russell/v2.1/RU01.20221115150135.gff3"
         ],
         [
-            ".test/liftoff/TAIR10_chr_all.fas",
-            ".test/liftoff/TAIR10_GFF3_genes_transposons.fixed.gff3"
+            "/output/genomic/fairGenomes/Plant/Arabidopsis/thaliana/var_na/sex_na/2x/assembly_tair/v10/TAIR10_chr_all.fas",
+            "/output/genomic/fairGenomes/Plant/Arabidopsis/thaliana/var_na/sex_na/2x/assembly_tair/v10/TAIR10_GFF3_genes_transposons.fixed.gff3"
         ]
     ]
     // Format:      [ [ fasta(.gz), gff3(.gz) ] ]
diff --git a/pangene_local_stub.sh b/pangene_local_stub.sh
index 0a18650..7101009 100755
--- a/pangene_local_stub.sh
+++ b/pangene_local_stub.sh
@@ -5,6 +5,4 @@ nextflow \
     -profile local,docker \
     -resume \
     -stub \
-    --max_cpus=1 \
-    --max_memory=1.GB \
-    --samplesheet="./.test/samplesheet_small.csv"
\ No newline at end of file
+    --params-file conf/local_stub_params.json
\ No newline at end of file
diff --git a/subworkflows/local/extract_samples.nf b/subworkflows/local/extract_samples.nf
index 957f218..75437da 100644
--- a/subworkflows/local/extract_samples.nf
+++ b/subworkflows/local/extract_samples.nf
@@ -67,7 +67,5 @@ def create_fastq_channel(LinkedHashMap row, sheetPath) {
         fastq_meta = [ meta, [ file(fq1), file(fq2) ] ]
     }
 
-    println fastq_meta
-
     return fastq_meta
 }
\ No newline at end of file

From e213bd383aa73e6e37b22a452e9f30cb40868398 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Mon, 8 Jan 2024 15:31:07 +1300
Subject: [PATCH 47/59] Fixed local script typo

---
 pangene_local_stub.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pangene_local_stub.sh b/pangene_local_stub.sh
index 7101009..01c93e4 100755
--- a/pangene_local_stub.sh
+++ b/pangene_local_stub.sh
@@ -5,4 +5,4 @@ nextflow \
     -profile local,docker \
     -resume \
     -stub \
-    --params-file conf/local_stub_params.json
\ No newline at end of file
+    -params-file conf/local_stub_params.json
\ No newline at end of file

From c961ab03b30bb77936fae6fa6f2bc839b8648856 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Mon, 8 Jan 2024 17:23:31 +1300
Subject: [PATCH 48/59] Fixed apptainer scope bug in base config

---
 conf/base.config | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index 4467c0b..2a6c2fe 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -2,10 +2,10 @@ profiles {
     pfr {
         process {
             executor        = 'slurm'
+        }
 
-            apptainer {
-                envWhitelist= 'APPTAINER_BINDPATH,APPTAINER_BIND'
-            }
+        apptainer {
+            envWhitelist    = 'APPTAINER_BINDPATH,APPTAINER_BIND'
         }
     }
     

From 260c7068162bfc1bd5b4b4f0d6c6586d022074ed Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Mon, 8 Jan 2024 17:47:13 +1300
Subject: [PATCH 49/59] Updated README

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index e5251f2..8f277ea 100644
--- a/README.md
+++ b/README.md
@@ -82,7 +82,7 @@ flowchart TD
 Configure the pipeline by modifying `nextflow.config` and submit to SLURM for execution.
 
 ```bash
-sbatch ./pan_gene_pfr.sh
+sbatch ./pangene_pfr.sh
 ```
 
 ## Third-party Sources

From 565e7d76c6b99eee0efadc53ba631c99f05c8d99 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Tue, 9 Jan 2024 11:39:32 +1300
Subject: [PATCH 50/59] Readded -exclude_partial and now using teambraker
 container

---
 conf/modules.config                | 1 +
 modules/kherronism/braker3/main.nf | 5 +----
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 474a129..392583a 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -135,6 +135,7 @@ if(params.liftoff_xref_annotations) {
         withName: LIFTOFF {
             ext.args = '    '
             ext.args = [
+                '-exclude_partial',
                 '-copies',
                 '-polish',
                 "-a $params.liftoff_coverage",
diff --git a/modules/kherronism/braker3/main.nf b/modules/kherronism/braker3/main.nf
index 14fc08c..31c1b33 100644
--- a/modules/kherronism/braker3/main.nf
+++ b/modules/kherronism/braker3/main.nf
@@ -2,10 +2,7 @@ process BRAKER3 {
     tag "${meta.id}"
     label 'process_high'
 
-    conda "bioconda::braker3=3.0.6"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/braker3%3A3.0.6--hdfd78af_0':
-        'biocontainers/braker3:3.0.6--hdfd78af_0' }"
+    container "docker://teambraker/braker3:v1.0.6"
 
     input:
     tuple val(meta), path(fasta)

From 47b7c40a23a626bf071432b057899995df3ef723 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Tue, 9 Jan 2024 21:06:21 +1300
Subject: [PATCH 51/59] Added config for test data and quay.io container for
 braker3

---
 README.md                          |  2 +-
 assets/rrna-db-test.txt            |  1 +
 conf/local_stub_params.json        | 29 ----------------------------
 conf/test_params.json              | 31 ++++++++++++++++++++++++++++++
 modules/kherronism/braker3/main.nf |  2 +-
 pangene_local                      | 16 +++++++++++++++
 pangene_local_stub.sh              |  8 --------
 pangene_pfr.sh => pangene_pfr      |  0
 8 files changed, 50 insertions(+), 39 deletions(-)
 create mode 100644 assets/rrna-db-test.txt
 delete mode 100644 conf/local_stub_params.json
 create mode 100644 conf/test_params.json
 create mode 100755 pangene_local
 delete mode 100755 pangene_local_stub.sh
 rename pangene_pfr.sh => pangene_pfr (100%)

diff --git a/README.md b/README.md
index 8f277ea..b1d7966 100644
--- a/README.md
+++ b/README.md
@@ -82,7 +82,7 @@ flowchart TD
 Configure the pipeline by modifying `nextflow.config` and submit to SLURM for execution.
 
 ```bash
-sbatch ./pangene_pfr.sh
+sbatch ./pangene_pfr
 ```
 
 ## Third-party Sources
diff --git a/assets/rrna-db-test.txt b/assets/rrna-db-test.txt
new file mode 100644
index 0000000..16504bb
--- /dev/null
+++ b/assets/rrna-db-test.txt
@@ -0,0 +1 @@
+https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/silva-euk-28s-id98.fasta
\ No newline at end of file
diff --git a/conf/local_stub_params.json b/conf/local_stub_params.json
deleted file mode 100644
index df155d5..0000000
--- a/conf/local_stub_params.json
+++ /dev/null
@@ -1,29 +0,0 @@
-{
-  "target_assemblies": [
-    ["red5_v2p1", ".test/red5_v2p1_chr1.fasta"],
-    ["donghong", ".test/donghong.chr1.fsa.gz"]
-  ],
-
-  "te_libraries": [["donghong", ".test/donghong.TElib.fa.gz"]],
-
-  "samplesheet": "./.test/samplesheet_small.csv",
-
-  "external_protein_fastas": [
-    ".test/ext_prots/Viridiplantae.fa.gz",
-    ".test/ext_prots/RU01.20221115150135.pep.fasta"
-  ],
-
-  "liftoff_xref_annotations": [
-    [
-      ".test/liftoff/Russell_V2a.chromosomes.and.unassiged.and.haplotigs.fsa",
-      ".test/liftoff/RU01.20221115150135.gff3"
-    ],
-    [
-      ".test/liftoff/TAIR10_chr_all.fas",
-      ".test/liftoff/TAIR10_GFF3_genes_transposons.fixed.gff3"
-    ]
-  ],
-
-  "max_cpus": 1,
-  "max_memory": "1.GB"
-}
diff --git a/conf/test_params.json b/conf/test_params.json
new file mode 100644
index 0000000..30c9c9c
--- /dev/null
+++ b/conf/test_params.json
@@ -0,0 +1,31 @@
+{
+  "target_assemblies": [
+    ["red5_v2p1", ".test/target/red5_v2p1_chr1_600k.fasta.gz"],
+    ["donghong", ".test/target/donghong_chr1_600k.fsa.gz"]
+  ],
+
+  "te_libraries": [["donghong", ".test/te_lib/donghong.TElib.fa.gz"]],
+
+  "samplesheet": ".test/samplesheet/samplesheet.csv",
+
+  "ribo_database_manifest":"assets/rrna-db-test.txt",
+
+  "external_protein_fastas": [
+    ".test/ext_prot/RU01_20221115150135_chr1_600k.pep.fasta.gz",
+    ".test/ext_prot/RU01_20221115150135_chr2_600k.pep.fasta.gz"
+  ],
+
+  "liftoff_xref_annotations": [
+    [
+      ".test/liftoff/Russell_V2a_chr1_600k.fsa.gz",
+      ".test/liftoff/Russell_V2a_chr1_600k.gff3.gz"
+    ],
+    [
+      ".test/liftoff/TAIR10_chr1_600k.fas.gz",
+      ".test/liftoff/TAIR10_chr1_600k.gff3.gz"
+    ]
+  ],
+
+  "max_cpus": 2,
+  "max_memory": "3.GB"
+}
diff --git a/modules/kherronism/braker3/main.nf b/modules/kherronism/braker3/main.nf
index 31c1b33..aab3eae 100644
--- a/modules/kherronism/braker3/main.nf
+++ b/modules/kherronism/braker3/main.nf
@@ -2,7 +2,7 @@ process BRAKER3 {
     tag "${meta.id}"
     label 'process_high'
 
-    container "docker://teambraker/braker3:v1.0.6"
+    container "gallvp/teambraker_braker3:v1.0.6"
 
     input:
     tuple val(meta), path(fasta)
diff --git a/pangene_local b/pangene_local
new file mode 100755
index 0000000..8e8e692
--- /dev/null
+++ b/pangene_local
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+
+[[ $1 == '-stub' ]] \
+    && stub='-stub' \
+    || stub=''
+
+[[ $1 == '-stub' ]] \
+    && echo 'Executing with -stub' \
+    || echo 'Executing without -stub'
+
+nextflow \
+    main.nf \
+    -profile local,docker \
+    -resume \
+    $stub \
+    -params-file conf/test_params.json
\ No newline at end of file
diff --git a/pangene_local_stub.sh b/pangene_local_stub.sh
deleted file mode 100755
index 01c93e4..0000000
--- a/pangene_local_stub.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/usr/bin/env bash
-
-nextflow \
-    main.nf \
-    -profile local,docker \
-    -resume \
-    -stub \
-    -params-file conf/local_stub_params.json
\ No newline at end of file
diff --git a/pangene_pfr.sh b/pangene_pfr
similarity index 100%
rename from pangene_pfr.sh
rename to pangene_pfr

From 664fba178e10e6d078964651f4b904851c4eafb3 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Wed, 10 Jan 2024 11:06:52 +1300
Subject: [PATCH 52/59] Now using repeatmodeler by default

---
 README.md                                     | 11 ++-
 conf/modules.config                           | 13 ++-
 modules.json                                  | 10 ++
 modules/local/validate_params.nf              |  8 ++
 .../builddatabase/environment.yml             |  9 ++
 .../pfr/repeatmodeler/builddatabase/main.nf   | 50 ++++++++++
 .../pfr/repeatmodeler/builddatabase/meta.yml  | 44 +++++++++
 .../builddatabase/tests/main.nf.test          | 60 ++++++++++++
 .../builddatabase/tests/main.nf.test.snap     | 16 ++++
 .../builddatabase/tests/tags.yml              |  2 +
 .../repeatmodeler/environment.yml             |  9 ++
 .../pfr/repeatmodeler/repeatmodeler/main.nf   | 54 +++++++++++
 .../pfr/repeatmodeler/repeatmodeler/meta.yml  | 52 +++++++++++
 .../repeatmodeler/tests/main.nf.test          | 92 +++++++++++++++++++
 .../repeatmodeler/tests/main.nf.test.snap     | 46 ++++++++++
 .../repeatmodeler/tests/tags.yml              |  2 +
 nextflow.config                               |  6 +-
 subworkflows/local/prepare_assembly.nf        | 29 +++++-
 workflows/pangene.nf                          |  3 +-
 19 files changed, 507 insertions(+), 9 deletions(-)
 create mode 100644 modules/pfr/repeatmodeler/builddatabase/environment.yml
 create mode 100644 modules/pfr/repeatmodeler/builddatabase/main.nf
 create mode 100644 modules/pfr/repeatmodeler/builddatabase/meta.yml
 create mode 100644 modules/pfr/repeatmodeler/builddatabase/tests/main.nf.test
 create mode 100644 modules/pfr/repeatmodeler/builddatabase/tests/main.nf.test.snap
 create mode 100644 modules/pfr/repeatmodeler/builddatabase/tests/tags.yml
 create mode 100644 modules/pfr/repeatmodeler/repeatmodeler/environment.yml
 create mode 100644 modules/pfr/repeatmodeler/repeatmodeler/main.nf
 create mode 100644 modules/pfr/repeatmodeler/repeatmodeler/meta.yml
 create mode 100644 modules/pfr/repeatmodeler/repeatmodeler/tests/main.nf.test
 create mode 100644 modules/pfr/repeatmodeler/repeatmodeler/tests/main.nf.test.snap
 create mode 100644 modules/pfr/repeatmodeler/repeatmodeler/tests/tags.yml

diff --git a/README.md b/README.md
index b1d7966..5efb30f 100644
--- a/README.md
+++ b/README.md
@@ -12,16 +12,21 @@ flowchart TD
     FASTA_VALIDATE
     fasta_file_from_fasta_validate
     EDTA
+    REPEATMODELER
+    te_lib_absent_node
     REPEATMASKER
     end
 
     TARGET_ASSEMBLIES(["[target_assemblies]"])
     TE_LIBRARIES(["[te_libs]"])
     TARGET_ASSEMBLIES --> FASTA_VALIDATE
-    FASTA_VALIDATE --> |Fasta|fasta_file_from_fasta_validate(( ))
-    fasta_file_from_fasta_validate --> EDTA
+    FASTA_VALIDATE --- |Fasta|fasta_file_from_fasta_validate(( ))
+    fasta_file_from_fasta_validate --> |or|EDTA
+    fasta_file_from_fasta_validate --> |default|REPEATMODELER
+    REPEATMODELER --- te_lib_absent_node(( ))
+    EDTA --- te_lib_absent_node
     TE_LIBRARIES --> REPEATMASKER
-    EDTA --> |te_lib absent|REPEATMASKER
+    te_lib_absent_node --> REPEATMASKER
 
     subgraph Samplesheet [ ]
     SAMPLESHEET
diff --git a/conf/modules.config b/conf/modules.config
index 392583a..d2149ce 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -10,7 +10,18 @@ process {
             path: { "${params.outdir}/edta" },
             mode: "copy",
             pattern: '*.EDTA.TElib.fa',
-            enabled: params.edta_save_te_lib
+            enabled: params.save_annotated_te_lib
+        ]
+    }
+
+    withName: 'REPEATMODELER_REPEATMODELER' {
+        ext.args = '-LTRStruct'
+
+        publishDir = [
+            path: { "${params.outdir}/repeatmodeler" },
+            mode: "copy",
+            pattern: '*.fa',
+            enabled: params.save_annotated_te_lib
         ]
     }
 
diff --git a/modules.json b/modules.json
index cdb8a24..a645b68 100644
--- a/modules.json
+++ b/modules.json
@@ -29,6 +29,16 @@
             "branch": "main",
             "git_sha": "444b35f4e6285115f84d2bfce49fc0e6d8a2754e",
             "installed_by": ["modules"]
+          },
+          "repeatmodeler/builddatabase": {
+            "branch": "main",
+            "git_sha": "9da0567f685b2772f65290f2bd6d6347671c8310",
+            "installed_by": ["modules"]
+          },
+          "repeatmodeler/repeatmodeler": {
+            "branch": "main",
+            "git_sha": "9da0567f685b2772f65290f2bd6d6347671c8310",
+            "installed_by": ["modules"]
           }
         }
       },
diff --git a/modules/local/validate_params.nf b/modules/local/validate_params.nf
index 5eb6207..f6ce18a 100644
--- a/modules/local/validate_params.nf
+++ b/modules/local/validate_params.nf
@@ -1,6 +1,14 @@
 def validateParams(params) {
     validateFastaTags(params)
     
+    if (!params['repeat_annotator']) {
+        error "Error: repeat_annotator must be either 'repeatmodeler' or 'edta'"
+    }
+
+    if ( !(params['repeat_annotator'] in ['repeatmodeler', 'edta']) ) {
+        error "Error: repeat_annotator must be either 'repeatmodeler' or 'edta'"
+    }
+    
     validateTETags(params)
     validateTEFastaCorrespondence(params)
 
diff --git a/modules/pfr/repeatmodeler/builddatabase/environment.yml b/modules/pfr/repeatmodeler/builddatabase/environment.yml
new file mode 100644
index 0000000..ecc282e
--- /dev/null
+++ b/modules/pfr/repeatmodeler/builddatabase/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "repeatmodeler_builddatabase"
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - "bioconda::repeatmodeler=2.0.5"
diff --git a/modules/pfr/repeatmodeler/builddatabase/main.nf b/modules/pfr/repeatmodeler/builddatabase/main.nf
new file mode 100644
index 0000000..486e25d
--- /dev/null
+++ b/modules/pfr/repeatmodeler/builddatabase/main.nf
@@ -0,0 +1,50 @@
+process REPEATMODELER_BUILDDATABASE {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/repeatmodeler:2.0.5--pl5321hdfd78af_0':
+        'biocontainers/repeatmodeler:2.0.5--pl5321hdfd78af_0' }"
+
+    input:
+    tuple val(meta), path(fasta)
+
+    output:
+    tuple val(meta), path("${prefix}.*")    , emit: db
+    path "versions.yml"                     , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    BuildDatabase \\
+        -name $prefix \\
+        $fasta
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        repeatmodeler: \$(RepeatModeler | grep '/usr/local/bin/RepeatModeler - ' | sed 's|/usr/local/bin/RepeatModeler - ||')
+    END_VERSIONS
+    """
+
+    stub:
+    prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}.nhr
+    touch ${prefix}.nin
+    touch ${prefix}.njs
+    touch ${prefix}.nnd
+    touch ${prefix}.nni
+    touch ${prefix}.nog
+    touch ${prefix}.nsq
+    touch ${prefix}.translation
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        repeatmodeler: \$(RepeatModeler | grep '/usr/local/bin/RepeatModeler - ' | sed 's|/usr/local/bin/RepeatModeler - ||')
+    END_VERSIONS
+    """
+}
diff --git a/modules/pfr/repeatmodeler/builddatabase/meta.yml b/modules/pfr/repeatmodeler/builddatabase/meta.yml
new file mode 100644
index 0000000..d3aa931
--- /dev/null
+++ b/modules/pfr/repeatmodeler/builddatabase/meta.yml
@@ -0,0 +1,44 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "repeatmodeler_builddatabase"
+description: Create a database for RepeatModeler
+keywords:
+  - genomics
+  - fasta
+  - repeat
+tools:
+  - "repeatmodeler":
+      description: "RepeatModeler is a de-novo repeat family identification and modeling package."
+      homepage: "https://github.com/Dfam-consortium/RepeatModeler"
+      documentation: "https://github.com/Dfam-consortium/RepeatModeler"
+      tool_dev_url: "https://github.com/Dfam-consortium/RepeatModeler"
+      licence: ["Open Software License v2.1"]
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. `[ id:'sample1' ]`
+  - fasta:
+      type: file
+      description: Fasta file
+      pattern: "*.{fasta,fsa,fa}"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. `[ id:'sample1' ]`
+  - db:
+      type: file
+      description: Database files for repeatmodeler
+      pattern: "`${prefix}.*`"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@GallVp"
+maintainers:
+  - "@GallVp"
diff --git a/modules/pfr/repeatmodeler/builddatabase/tests/main.nf.test b/modules/pfr/repeatmodeler/builddatabase/tests/main.nf.test
new file mode 100644
index 0000000..616f88c
--- /dev/null
+++ b/modules/pfr/repeatmodeler/builddatabase/tests/main.nf.test
@@ -0,0 +1,60 @@
+nextflow_process {
+
+    name "Test Process REPEATMODELER_BUILDDATABASE"
+    script "../main.nf"
+    process "REPEATMODELER_BUILDDATABASE"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "repeatmodeler"
+    tag "repeatmodeler/builddatabase"
+
+    test("sarscov2-genome_fasta") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ], // meta map
+                    file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.versions).match("versions") },
+                { assert snapshot(process.out.db[0][1].collect { file(it).name }.sort().toString()).match("for-stub-match") }
+            )
+        }
+
+    }
+
+    test("sarscov2-genome_fasta-stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ], // meta map
+                    file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.versions).match("versions") },
+                { assert snapshot(process.out.db[0][1].collect { file(it).name }.sort().toString()).match("for-stub-match") }
+            )
+        }
+
+    }
+
+}
diff --git a/modules/pfr/repeatmodeler/builddatabase/tests/main.nf.test.snap b/modules/pfr/repeatmodeler/builddatabase/tests/main.nf.test.snap
new file mode 100644
index 0000000..cda327e
--- /dev/null
+++ b/modules/pfr/repeatmodeler/builddatabase/tests/main.nf.test.snap
@@ -0,0 +1,16 @@
+{
+    "versions": {
+        "content": [
+            [
+                "versions.yml:md5,7944637266bc3e2726899eaad5e46c87"
+            ]
+        ],
+        "timestamp": "2024-01-09T15:14:48.807063"
+    },
+    "for-stub-match": {
+        "content": [
+            "[test.nhr, test.nin, test.njs, test.nnd, test.nni, test.nog, test.nsq, test.translation]"
+        ],
+        "timestamp": "2024-01-09T15:14:48.81702"
+    }
+}
\ No newline at end of file
diff --git a/modules/pfr/repeatmodeler/builddatabase/tests/tags.yml b/modules/pfr/repeatmodeler/builddatabase/tests/tags.yml
new file mode 100644
index 0000000..426540d
--- /dev/null
+++ b/modules/pfr/repeatmodeler/builddatabase/tests/tags.yml
@@ -0,0 +1,2 @@
+repeatmodeler/builddatabase:
+  - "modules/pfr/repeatmodeler/builddatabase/**"
diff --git a/modules/pfr/repeatmodeler/repeatmodeler/environment.yml b/modules/pfr/repeatmodeler/repeatmodeler/environment.yml
new file mode 100644
index 0000000..2422071
--- /dev/null
+++ b/modules/pfr/repeatmodeler/repeatmodeler/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "repeatmodeler_repeatmodeler"
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - "bioconda::repeatmodeler=2.0.5"
diff --git a/modules/pfr/repeatmodeler/repeatmodeler/main.nf b/modules/pfr/repeatmodeler/repeatmodeler/main.nf
new file mode 100644
index 0000000..34df322
--- /dev/null
+++ b/modules/pfr/repeatmodeler/repeatmodeler/main.nf
@@ -0,0 +1,54 @@
+process REPEATMODELER_REPEATMODELER {
+    tag "$meta.id"
+    label 'process_high'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/repeatmodeler:2.0.5--pl5321hdfd78af_0':
+        'biocontainers/repeatmodeler:2.0.5--pl5321hdfd78af_0' }"
+
+    input:
+    tuple val(meta), path(db)
+
+    output:
+    tuple val(meta), path("*.fa")   , emit: fasta
+    tuple val(meta), path("*.stk")  , emit: stk
+    tuple val(meta), path("*.log")  , emit: log
+    path "versions.yml"             , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args    = task.ext.args ?: ''
+    def prefix  = task.ext.prefix ?: "${meta.id}"
+    def db_name = file(db[0]).getBaseName()
+    """
+    RepeatModeler \\
+        -database $db_name \\
+        $args \\
+        -threads $task.cpus
+
+    mv ${db_name}-families.fa   ${prefix}.fa
+    mv ${db_name}-families.stk  ${prefix}.stk
+    mv ${db_name}-rmod.log      ${prefix}.log
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        repeatmodeler: \$(RepeatModeler | grep '/usr/local/bin/RepeatModeler - ' | sed 's|/usr/local/bin/RepeatModeler - ||')
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix  = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}.fa
+    touch ${prefix}.stk
+    touch ${prefix}.log
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        repeatmodeler: \$(RepeatModeler | grep '/usr/local/bin/RepeatModeler - ' | sed 's|/usr/local/bin/RepeatModeler - ||')
+    END_VERSIONS
+    """
+}
diff --git a/modules/pfr/repeatmodeler/repeatmodeler/meta.yml b/modules/pfr/repeatmodeler/repeatmodeler/meta.yml
new file mode 100644
index 0000000..29bb795
--- /dev/null
+++ b/modules/pfr/repeatmodeler/repeatmodeler/meta.yml
@@ -0,0 +1,52 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "repeatmodeler_repeatmodeler"
+description: Performs de novo transposable element (TE) family identification with RepeatModeler
+keywords:
+  - genomics
+  - fasta
+  - repeat
+  - transposable element
+tools:
+  - "repeatmodeler":
+      description: "RepeatModeler is a de-novo repeat family identification and modeling package."
+      homepage: "https://github.com/Dfam-consortium/RepeatModeler"
+      documentation: "https://github.com/Dfam-consortium/RepeatModeler"
+      tool_dev_url: "https://github.com/Dfam-consortium/RepeatModeler"
+      licence: ["Open Software License v2.1"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. `[ id:'sample1' ]`
+  - db:
+      type: file
+      description: RepeatModeler database files generated with REPEATMODELER_BUILDDATABASE
+      pattern: "*"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. `[ id:'sample1' ]`
+  - fasta:
+      type: file
+      description: Consensus repeat sequences
+      pattern: "*.fa"
+  - stk:
+      type: file
+      description: Seed alignments
+      pattern: "*.stk"
+  - log:
+      type: file
+      description: A summarized log of the run
+      pattern: "*.log"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@GallVp"
+maintainers:
+  - "@GallVp"
diff --git a/modules/pfr/repeatmodeler/repeatmodeler/tests/main.nf.test b/modules/pfr/repeatmodeler/repeatmodeler/tests/main.nf.test
new file mode 100644
index 0000000..78b7957
--- /dev/null
+++ b/modules/pfr/repeatmodeler/repeatmodeler/tests/main.nf.test
@@ -0,0 +1,92 @@
+nextflow_process {
+
+    name "Test Process REPEATMODELER_REPEATMODELER"
+    script "../main.nf"
+    process "REPEATMODELER_REPEATMODELER"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "repeatmodeler"
+    tag "repeatmodeler/repeatmodeler"
+    tag "repeatmodeler/builddatabase"
+
+    test("homo_sapiens-genome_fasta") {
+
+        setup {
+            run("REPEATMODELER_BUILDDATABASE") {
+                script "../../../../pfr/repeatmodeler/builddatabase"
+
+                process {
+                    """
+                    input[0] = [
+                        [ id:'test' ], // meta map
+                        file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+                    ]
+                    """
+                }
+            }
+        }
+
+        when {
+            process {
+                """
+                input[0] = REPEATMODELER_BUILDDATABASE.out.db
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.fasta).match("fasta") },
+                { assert snapshot(process.out.stk).match("stk") },
+                { assert file(process.out.log[0][1]).text.contains('1 families discovered.') },
+                { assert snapshot(process.out.versions).match("versions") },
+                {
+                    assert snapshot(
+                        (
+                            process.out.fasta.collect { file(it[1]).getName() } +
+                            process.out.stk.collect { file(it[1]).getName() } +
+                            process.out.log.collect { file(it[1]).getName() }
+                        ).sort()
+                    ).match("for-stub-match")
+                }
+            )
+        }
+
+    }
+
+    test("homo_sapiens-genome_fasta-stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ], // meta map
+                    file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.versions).match("versions") },
+                {
+                    assert snapshot(
+                        (
+                            process.out.fasta.collect { file(it[1]).getName() } +
+                            process.out.stk.collect { file(it[1]).getName() } +
+                            process.out.log.collect { file(it[1]).getName() }
+                        ).sort()
+                    ).match("for-stub-match")
+                }
+            )
+        }
+
+    }
+
+}
diff --git a/modules/pfr/repeatmodeler/repeatmodeler/tests/main.nf.test.snap b/modules/pfr/repeatmodeler/repeatmodeler/tests/main.nf.test.snap
new file mode 100644
index 0000000..051dd60
--- /dev/null
+++ b/modules/pfr/repeatmodeler/repeatmodeler/tests/main.nf.test.snap
@@ -0,0 +1,46 @@
+{
+    "versions": {
+        "content": [
+            [
+                "versions.yml:md5,1bb6846ecf1304c262eaef4d3de60cf9"
+            ]
+        ],
+        "timestamp": "2024-01-09T15:06:55.753492"
+    },
+    "stk": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.stk:md5,acd01ad35763c11315e2297a4f051d57"
+                ]
+            ]
+        ],
+        "timestamp": "2024-01-09T15:06:55.740963"
+    },
+    "for-stub-match": {
+        "content": [
+            [
+                "test.fa",
+                "test.log",
+                "test.stk"
+            ]
+        ],
+        "timestamp": "2024-01-09T15:06:55.759971"
+    },
+    "fasta": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.fa:md5,e25326771341204e1f8054d9529411e5"
+                ]
+            ]
+        ],
+        "timestamp": "2024-01-09T15:06:55.737658"
+    }
+}
\ No newline at end of file
diff --git a/modules/pfr/repeatmodeler/repeatmodeler/tests/tags.yml b/modules/pfr/repeatmodeler/repeatmodeler/tests/tags.yml
new file mode 100644
index 0000000..648cc93
--- /dev/null
+++ b/modules/pfr/repeatmodeler/repeatmodeler/tests/tags.yml
@@ -0,0 +1,2 @@
+repeatmodeler/repeatmodeler:
+  - "modules/pfr/repeatmodeler/repeatmodeler/**"
diff --git a/nextflow.config b/nextflow.config
index e0d658b..cbc0a2e 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -20,8 +20,12 @@ params {
     // Not all target_assemblies need to have an associated (by tag) TE library.
     // When the TE lib is not available for a traget assembly, EDTA is used to create one.
     
+    repeat_annotator            = 'repeatmodeler'
+    // 'repeatmodeler' or 'edta'
+    
+    save_annotated_te_lib       = true
+    
     edta_is_sensitive           = false
-    edta_save_te_lib            = true
     
     repeatmasker_save_outputs   = true
     
diff --git a/subworkflows/local/prepare_assembly.nf b/subworkflows/local/prepare_assembly.nf
index ed32afb..9fc6244 100644
--- a/subworkflows/local/prepare_assembly.nf
+++ b/subworkflows/local/prepare_assembly.nf
@@ -1,6 +1,8 @@
 include { GUNZIP as GUNZIP_TARGET_ASSEMBLY      } from '../../modules/nf-core/gunzip'
 include { GUNZIP as GUNZIP_TE_LIBRARY           } from '../../modules/nf-core/gunzip'
 include { FASTAVALIDATOR                        } from '../../modules/nf-core/fastavalidator'
+include { REPEATMODELER_BUILDDATABASE           } from '../../modules/pfr/repeatmodeler/builddatabase'
+include { REPEATMODELER_REPEATMODELER           } from '../../modules/pfr/repeatmodeler/repeatmodeler'
 include { REPEATMASKER                          } from '../../modules/kherronism/repeatmasker'
 include { STAR_GENOMEGENERATE                   } from '../../modules/nf-core/star/genomegenerate'
 
@@ -10,6 +12,7 @@ workflow PREPARE_ASSEMBLY {
     take:
     target_assembly             // channel: [ meta, fasta ]
     te_library                  // channel: [ meta, fasta ]
+    repeat_annotator            // val(String), 'repeatmodeler' or 'edta'
 
     main:
     ch_versions                 = Channel.empty()
@@ -59,7 +62,7 @@ workflow PREPARE_ASSEMBLY {
     ch_versions                 = ch_versions.mix(GUNZIP_TE_LIBRARY.out.versions.first())
 
     // SUBWORKFLOW: FASTA_EDTA_LAI
-    ch_edta_inputs              = ch_validated_assembly
+    ch_annotator_inputs         = ch_validated_assembly
                                 | join(
                                     ch_gunzip_te_library, remainder: true
                                 )
@@ -67,19 +70,39 @@ workflow PREPARE_ASSEMBLY {
                                     teLib == null
                                 }
                                 | map { meta, assembly, teLib -> [meta, assembly] }
+
+    ch_edta_inputs              = repeat_annotator != 'edta'
+                                ? Channel.empty()
+                                : ch_annotator_inputs
     
     FASTA_EDTA_LAI(
         ch_edta_inputs,
         [],
         true // Skip LAI
     )
+
+    ch_versions                 = ch_versions.mix(FASTA_EDTA_LAI.out.versions.first())
+
+    // MODULE: REPEATMODELER_BUILDDATABASE
+    ch_repeatmodeler_inputs     = repeat_annotator != 'repeatmodeler'
+                                ? Channel.empty()
+                                : ch_annotator_inputs
+
+    REPEATMODELER_BUILDDATABASE ( ch_repeatmodeler_inputs )
+    
+    ch_versions                 = ch_versions.mix(REPEATMODELER_BUILDDATABASE.out.versions.first())
+
+    // MODULE: REPEATMODELER_REPEATMODELER
+    REPEATMODELER_REPEATMODELER ( REPEATMODELER_BUILDDATABASE.out.db )
     
     ch_assembly_and_te_lib      = ch_validated_assembly
                                 | join(
-                                    FASTA_EDTA_LAI.out.te_lib_fasta.mix(ch_gunzip_te_library)
+                                    repeat_annotator == 'edta'
+                                    ? FASTA_EDTA_LAI.out.te_lib_fasta.mix(ch_gunzip_te_library)
+                                    : REPEATMODELER_REPEATMODELER.out.fasta.mix(ch_gunzip_te_library)
                                 )
 
-    ch_versions                 = ch_versions.mix(FASTA_EDTA_LAI.out.versions.first())
+    ch_versions                 = ch_versions.mix(REPEATMODELER_REPEATMODELER.out.versions.first())
     
     // MODULE: REPEATMASKER
     REPEATMASKER(
diff --git a/workflows/pangene.nf b/workflows/pangene.nf
index 7cc8c4a..72b9fd6 100644
--- a/workflows/pangene.nf
+++ b/workflows/pangene.nf
@@ -66,7 +66,8 @@ workflow PANGENE {
     // SUBWORKFLOW: PREPARE_ASSEMBLY
     PREPARE_ASSEMBLY(
         ch_target_assembly,
-        ch_te_library
+        ch_te_library,
+        params.repeat_annotator
     )
 
     ch_valid_target_assembly    = PREPARE_ASSEMBLY.out.target_assemby

From dea4bb5af54cae9adca1852d472a29e9b0252e54 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Wed, 10 Jan 2024 12:54:37 +1300
Subject: [PATCH 53/59] BRAKER3 now runnable with test data

---
 conf/test_params.json              |  4 +++-
 modules/kherronism/braker3/main.nf | 23 ++++++++++++++---------
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/conf/test_params.json b/conf/test_params.json
index 30c9c9c..fda29e2 100644
--- a/conf/test_params.json
+++ b/conf/test_params.json
@@ -1,6 +1,6 @@
 {
   "target_assemblies": [
-    ["red5_v2p1", ".test/target/red5_v2p1_chr1_600k.fasta.gz"],
+    ["red5_v2p1", ".test/target/red5_v2p1_chr1_1200k.fasta.gz"],
     ["donghong", ".test/target/donghong_chr1_600k.fsa.gz"]
   ],
 
@@ -15,6 +15,8 @@
     ".test/ext_prot/RU01_20221115150135_chr2_600k.pep.fasta.gz"
   ],
 
+  "braker_extra_args": "--testMode --species=arabidopsis --useexisting",
+
   "liftoff_xref_annotations": [
     [
       ".test/liftoff/Russell_V2a_chr1_600k.fsa.gz",
diff --git a/modules/kherronism/braker3/main.nf b/modules/kherronism/braker3/main.nf
index aab3eae..ae0ec81 100644
--- a/modules/kherronism/braker3/main.nf
+++ b/modules/kherronism/braker3/main.nf
@@ -26,20 +26,25 @@ process BRAKER3 {
     task.ext.when == null || task.ext.when
 
     script:
-    def args        = task.ext.args         ?: ''
-    prefix          = task.ext.prefix       ?: "${meta.id}"
+    def args        = task.ext.args                                     ?: ''
+    prefix          = task.ext.prefix                                   ?: "${meta.id}"
 
-    def rna_ids     = rnaseq_sets_ids       ? "--rnaseq_sets_ids=${rnaseq_sets_ids}"    : ''
-    def rna_dirs    = rnaseq_sets_dirs      ? "--rnaseq_sets_dirs=${rnaseq_sets_dirs}"  : ''
-    def bam         = bam                   ? "--bam=${bam}"                            : ''
-    def proteins    = proteins              ? "--prot_seq=${proteins}"                  : ''
-    def hints       = hintsfile             ? "--hints=${hintsfile}"                    : ''
+    def test_mode   = args.contains('--testMode') // Custom flag for test data
+    def args_fmt    = test_mode ? args.replace('--testMode', '') : args
+
+    def rna_ids     = rnaseq_sets_ids           ? "--rnaseq_sets_ids=${rnaseq_sets_ids}"    : ''
+    def rna_dirs    = rnaseq_sets_dirs          ? "--rnaseq_sets_dirs=${rnaseq_sets_dirs}"  : ''
+    def bam         = bam && !test_mode         ? "--bam=${bam}"                            : ''
+    def proteins    = proteins && !test_mode    ? "--prot_seq=${proteins}"                  : ''
+    def hints       = hintsfile                 ? "--hints=${hintsfile}"                    : ''
+
+    def new_species = args.contains('--species')   ? '' : "--species new_species"
     """
     cp -r /usr/share/augustus/config augustus_config
 
     braker.pl \\
         --genome ${fasta} \\
-        --species ${prefix} \\
+        ${new_species} \\
         --workingdir ${prefix} \\
         --AUGUSTUS_CONFIG_PATH "\$(pwd)/augustus_config" \\
         --threads ${task.cpus} \\
@@ -48,7 +53,7 @@ process BRAKER3 {
         ${bam} \\
         ${proteins} \\
         ${hints} \\
-        ${args}
+        ${args_fmt}
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

From 0535b1a32bc9dd3dba7f96896a3c25e4ef6ffe18 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Wed, 10 Jan 2024 15:28:05 +1300
Subject: [PATCH 54/59] Added editor config

---
 .editorconfig | 15 +++++++++++++++
 .gitignore    |  2 +-
 cleanNXF.sh   |  2 +-
 3 files changed, 17 insertions(+), 2 deletions(-)
 create mode 100644 .editorconfig

diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 0000000..2951ad8
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,15 @@
+root = true
+
+[*]
+charset = utf-8
+end_of_line = lf
+insert_final_newline = true
+trim_trailing_whitespace = true
+indent_size = 4
+indent_style = space
+
+[*.{md,yml,yaml,cff}]
+indent_size = 2
+
+[*.nf.test]
+insert_final_newline = false
diff --git a/.gitignore b/.gitignore
index 8f984b0..93035ae 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,4 +11,4 @@ testing*
 *.stderr
 
 .literature
-.test
\ No newline at end of file
+.test
diff --git a/cleanNXF.sh b/cleanNXF.sh
index c566dbf..8c64a3e 100755
--- a/cleanNXF.sh
+++ b/cleanNXF.sh
@@ -8,4 +8,4 @@ for i in $(ls work | grep -v "conda");
 do
     rm -rf "work/$i"
 done
-echo "Cleaned work..."
\ No newline at end of file
+echo "Cleaned work..."

From 457a64303cf772998b3888592debf9af3400f512 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Thu, 11 Jan 2024 10:31:47 +1300
Subject: [PATCH 55/59] Disabled sortmerna by default added option to save cat
 bam

---
 conf/modules.config   | 13 +++++++++++--
 conf/test_params.json |  1 +
 nextflow.config       | 19 +++++++++++--------
 3 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index d2149ce..fc489bf 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -30,7 +30,7 @@ process {
             "-no_is",
             "-xsmall",
         ].join(' ').trim()
-        
+
         publishDir = [
             path: { "${params.outdir}/repeatmasker" },
             mode: "copy",
@@ -127,6 +127,15 @@ process {
         ]
     }
 
+    withName: '.*:ALIGN_RNASEQ:SAMTOOLS_CAT' {
+        publishDir = [
+            path: { "${params.outdir}/star/cat_bam" },
+            mode: "copy",
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+            enabled: params.save_cat_bam
+        ]
+    }
+
     withName: BRAKER3 {
         ext.args = [
             "--gff3",
@@ -174,4 +183,4 @@ process {
             enabled: true
         ]
     }
-}
\ No newline at end of file
+}
diff --git a/conf/test_params.json b/conf/test_params.json
index fda29e2..fef1871 100644
--- a/conf/test_params.json
+++ b/conf/test_params.json
@@ -8,6 +8,7 @@
 
   "samplesheet": ".test/samplesheet/samplesheet.csv",
 
+  "remove_ribo_rna": true,
   "ribo_database_manifest":"assets/rrna-db-test.txt",
 
   "external_protein_fastas": [
diff --git a/nextflow.config b/nextflow.config
index cbc0a2e..409da80 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -9,7 +9,7 @@ params {
     // Permissible tags:    tag, tag_1, tag_tag2_3, tag_tag2_tag3;
     //                      Any name with alphanumeric characters including "_".
     //                      "." is not allowed in the tag name
-    
+
     te_libraries                = [
         ["donghong", "/workspace/pangene/test_data/donghong.TElib.fa.gz"]
     ]
@@ -19,16 +19,16 @@ params {
     // Each TE library should have an associated (by tag) assembly in target_assemblies.
     // Not all target_assemblies need to have an associated (by tag) TE library.
     // When the TE lib is not available for a traget assembly, EDTA is used to create one.
-    
+
     repeat_annotator            = 'repeatmodeler'
     // 'repeatmodeler' or 'edta'
-    
+
     save_annotated_te_lib       = true
-    
+
     edta_is_sensitive           = false
-    
+
     repeatmasker_save_outputs   = true
-    
+
     samplesheet                 = "/workspace/pangene/test_data/samplesheet.csv"
     // Optional: Set to null if not available
 
@@ -40,13 +40,16 @@ params {
     save_trimmed                = true
     // toggling this parameter results in rerun of FASTP and FASTQC_TRIM
 
-    remove_ribo_rna             = true
+    remove_ribo_rna             = false
     save_non_ribo_reads         = true
     ribo_database_manifest      = "${projectDir}/assets/rrna-db-defaults.txt"
 
     star_max_intron_length      = 16000
     star_align_extra_args       = ""
     star_save_outputs           = true
+    save_cat_bam                = true
+    // A single BAM is created for each assembly from all the RNAseq samples, if there
+    // are more than one
 
     external_protein_fastas     = [
         "/workspace/ComparativeDataSources/OrthoDB11/Viridiplantae.fa.gz",
@@ -81,4 +84,4 @@ params {
 
 includeConfig './conf/manifest.config'
 includeConfig './conf/modules.config'
-includeConfig './conf/reporting_defaults.config'
\ No newline at end of file
+includeConfig './conf/reporting_defaults.config'

From 784bb54b01a36cc89d62e672074389c3517d5a67 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Thu, 11 Jan 2024 11:21:45 +1300
Subject: [PATCH 56/59] Added pre-commit

---
 .gitignore                                    |  10 +-
 .pre-commit-config.yaml                       |   5 +
 .prettierignore                               |  19 ++
 .prettierrc.yml                               |   1 +
 assets/rrna-db-defaults.txt                   |   2 +-
 assets/rrna-db-test.txt                       |   2 +-
 bin/make-samplesheet.py                       |   2 +-
 conf/base.config                              |   2 +-
 conf/manifest.config                          |   2 +-
 conf/reporting_defaults.config                |   2 +-
 conf/test_params.json                         |  42 ++-
 main.nf                                       |   2 +-
 modules.json                                  | 296 +++++++++---------
 .../dumpsoftwareversions/environment.yml      |   2 +-
 .../custom/dumpsoftwareversions/main.nf       |   4 +-
 .../tests/main.nf.test.snap                   |  12 +-
 .../umitools/extract/tests/nextflow.config    |   2 +-
 pangene_local                                 |   8 +-
 pangene_pfr                                   |   2 +-
 subworkflows/local/align_rnaseq.nf            |  10 +-
 subworkflows/local/extract_samples.nf         |   6 +-
 subworkflows/local/fasta_liftoff.nf           |  12 +-
 subworkflows/local/prepare_assembly.nf        |  12 +-
 subworkflows/local/prepare_ext_prots.nf       |  12 +-
 subworkflows/local/preprocess_rnaseq.nf       |  12 +-
 workflows/pangene.nf                          |   8 +-
 26 files changed, 257 insertions(+), 232 deletions(-)
 create mode 100644 .pre-commit-config.yaml
 create mode 100644 .prettierignore
 create mode 100644 .prettierrc.yml

diff --git a/.gitignore b/.gitignore
index 93035ae..62d31c0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,14 +1,16 @@
 .nextflow*
 work/
-data/
 results/
 .DS_Store
-testing/
-testing*
+*.code-workspace
+.screenrc
+.*.sw?
+__pycache__
+*.pyo
 *.pyc
 
 *.stdout
 *.stderr
 
 .literature
-.test
+pangene-test/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..fc52181
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,5 @@
+repos:
+  - repo: https://github.com/pre-commit/mirrors-prettier
+    rev: "v3.1.0"
+    hooks:
+      - id: prettier
diff --git a/.prettierignore b/.prettierignore
new file mode 100644
index 0000000..24a3687
--- /dev/null
+++ b/.prettierignore
@@ -0,0 +1,19 @@
+includes/Maven_Pro/
+
+# gitignore
+.nextflow*
+work/
+results/
+.DS_Store
+*.code-workspace
+.screenrc
+.*.sw?
+__pycache__
+*.pyo
+*.pyc
+
+*.stdout
+*.stderr
+
+.literature
+pangene-test/
diff --git a/.prettierrc.yml b/.prettierrc.yml
new file mode 100644
index 0000000..c81f9a7
--- /dev/null
+++ b/.prettierrc.yml
@@ -0,0 +1 @@
+printWidth: 120
diff --git a/assets/rrna-db-defaults.txt b/assets/rrna-db-defaults.txt
index e2bc4e6..4223356 100644
--- a/assets/rrna-db-defaults.txt
+++ b/assets/rrna-db-defaults.txt
@@ -5,4 +5,4 @@ https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/s
 https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/silva-bac-16s-id90.fasta
 https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/silva-bac-23s-id98.fasta
 https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/silva-euk-18s-id95.fasta
-https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/silva-euk-28s-id98.fasta
\ No newline at end of file
+https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/silva-euk-28s-id98.fasta
diff --git a/assets/rrna-db-test.txt b/assets/rrna-db-test.txt
index 16504bb..20116f9 100644
--- a/assets/rrna-db-test.txt
+++ b/assets/rrna-db-test.txt
@@ -1 +1 @@
-https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/silva-euk-28s-id98.fasta
\ No newline at end of file
+https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/silva-euk-28s-id98.fasta
diff --git a/bin/make-samplesheet.py b/bin/make-samplesheet.py
index bc39f55..b4ad0b7 100755
--- a/bin/make-samplesheet.py
+++ b/bin/make-samplesheet.py
@@ -282,4 +282,4 @@ def main():
         make_samplesheet_from_command(input_path_or_command, exp_name)
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/conf/base.config b/conf/base.config
index 2a6c2fe..5f02f17 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -8,7 +8,7 @@ profiles {
             envWhitelist    = 'APPTAINER_BINDPATH,APPTAINER_BIND'
         }
     }
-    
+
     local {
         process {
             executor        = 'local'
diff --git a/conf/manifest.config b/conf/manifest.config
index 706052c..fd7c8f6 100644
--- a/conf/manifest.config
+++ b/conf/manifest.config
@@ -7,4 +7,4 @@ manifest {
     nextflowVersion         = '!>=23.04.4'
     version                 = '0.1'
     doi                     = ''
-}
\ No newline at end of file
+}
diff --git a/conf/reporting_defaults.config b/conf/reporting_defaults.config
index 5df9469..178522d 100644
--- a/conf/reporting_defaults.config
+++ b/conf/reporting_defaults.config
@@ -10,4 +10,4 @@ report {
 trace {
     enabled                 = true
     file                    = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt"
-}
\ No newline at end of file
+}
diff --git a/conf/test_params.json b/conf/test_params.json
index fef1871..0dc25f8 100644
--- a/conf/test_params.json
+++ b/conf/test_params.json
@@ -1,34 +1,28 @@
 {
-  "target_assemblies": [
-    ["red5_v2p1", ".test/target/red5_v2p1_chr1_1200k.fasta.gz"],
-    ["donghong", ".test/target/donghong_chr1_600k.fsa.gz"]
-  ],
+    "target_assemblies": [
+        ["red5_v2p1", "pangene-test/target/red5_v2p1_chr1_1200k.fasta.gz"],
+        ["donghong", "pangene-test/target/donghong_chr1_600k.fsa.gz"]
+    ],
 
-  "te_libraries": [["donghong", ".test/te_lib/donghong.TElib.fa.gz"]],
+    "te_libraries": [["donghong", "pangene-test/te_lib/donghong.TElib.fa.gz"]],
 
-  "samplesheet": ".test/samplesheet/samplesheet.csv",
+    "samplesheet": "pangene-test/samplesheet/samplesheet.csv",
 
-  "remove_ribo_rna": true,
-  "ribo_database_manifest":"assets/rrna-db-test.txt",
+    "remove_ribo_rna": true,
+    "ribo_database_manifest": "assets/rrna-db-test.txt",
 
-  "external_protein_fastas": [
-    ".test/ext_prot/RU01_20221115150135_chr1_600k.pep.fasta.gz",
-    ".test/ext_prot/RU01_20221115150135_chr2_600k.pep.fasta.gz"
-  ],
+    "external_protein_fastas": [
+        "pangene-test/ext_prot/RU01_20221115150135_chr1_600k.pep.fasta.gz",
+        "pangene-test/ext_prot/RU01_20221115150135_chr2_600k.pep.fasta.gz"
+    ],
 
-  "braker_extra_args": "--testMode --species=arabidopsis --useexisting",
+    "braker_extra_args": "--testMode --species=arabidopsis --useexisting",
 
-  "liftoff_xref_annotations": [
-    [
-      ".test/liftoff/Russell_V2a_chr1_600k.fsa.gz",
-      ".test/liftoff/Russell_V2a_chr1_600k.gff3.gz"
+    "liftoff_xref_annotations": [
+        ["pangene-test/liftoff/Russell_V2a_chr1_600k.fsa.gz", "pangene-test/liftoff/Russell_V2a_chr1_600k.gff3.gz"],
+        ["pangene-test/liftoff/TAIR10_chr1_600k.fas.gz", "pangene-test/liftoff/TAIR10_chr1_600k.gff3.gz"]
     ],
-    [
-      ".test/liftoff/TAIR10_chr1_600k.fas.gz",
-      ".test/liftoff/TAIR10_chr1_600k.gff3.gz"
-    ]
-  ],
 
-  "max_cpus": 2,
-  "max_memory": "3.GB"
+    "max_cpus": 2,
+    "max_memory": "3.GB"
 }
diff --git a/main.nf b/main.nf
index 7fe5247..9ed32f7 100755
--- a/main.nf
+++ b/main.nf
@@ -10,4 +10,4 @@ workflow {
 
 workflow PFR_PANGENE {
     PANGENE()
-}
\ No newline at end of file
+}
diff --git a/modules.json b/modules.json
index a645b68..4e8f0a9 100644
--- a/modules.json
+++ b/modules.json
@@ -1,152 +1,152 @@
 {
-  "name": "PlantandFoodResearch/pangene",
-  "homePage": "https://github.com/PlantandFoodResearch/pangene",
-  "repos": {
-    "git@github.com:PlantandFoodResearch/nxf-modules.git": {
-      "modules": {
-        "pfr": {
-          "custom/restoregffids": {
-            "branch": "main",
-            "git_sha": "e9f6bdd634bdbcd52c5568ba82f16176ec06631f",
-            "installed_by": ["fasta_edta_lai", "modules"]
-          },
-          "custom/shortenfastaids": {
-            "branch": "main",
-            "git_sha": "5e0e41b51d7fc7f68ae43692b6fe19b95d7f3a8c",
-            "installed_by": ["fasta_edta_lai", "modules"]
-          },
-          "edta/edta": {
-            "branch": "main",
-            "git_sha": "35468dbb1f35eb17a43d7e05544601c7c3f8cd90",
-            "installed_by": ["fasta_edta_lai", "modules"]
-          },
-          "lai": {
-            "branch": "main",
-            "git_sha": "7e6e3cb41362a045c6bb6065903efa0eba246e87",
-            "installed_by": ["fasta_edta_lai"]
-          },
-          "liftoff": {
-            "branch": "main",
-            "git_sha": "444b35f4e6285115f84d2bfce49fc0e6d8a2754e",
-            "installed_by": ["modules"]
-          },
-          "repeatmodeler/builddatabase": {
-            "branch": "main",
-            "git_sha": "9da0567f685b2772f65290f2bd6d6347671c8310",
-            "installed_by": ["modules"]
-          },
-          "repeatmodeler/repeatmodeler": {
-            "branch": "main",
-            "git_sha": "9da0567f685b2772f65290f2bd6d6347671c8310",
-            "installed_by": ["modules"]
-          }
+    "name": "PlantandFoodResearch/pangene",
+    "homePage": "https://github.com/PlantandFoodResearch/pangene",
+    "repos": {
+        "git@github.com:PlantandFoodResearch/nxf-modules.git": {
+            "modules": {
+                "pfr": {
+                    "custom/restoregffids": {
+                        "branch": "main",
+                        "git_sha": "e9f6bdd634bdbcd52c5568ba82f16176ec06631f",
+                        "installed_by": ["fasta_edta_lai", "modules"]
+                    },
+                    "custom/shortenfastaids": {
+                        "branch": "main",
+                        "git_sha": "5e0e41b51d7fc7f68ae43692b6fe19b95d7f3a8c",
+                        "installed_by": ["fasta_edta_lai", "modules"]
+                    },
+                    "edta/edta": {
+                        "branch": "main",
+                        "git_sha": "35468dbb1f35eb17a43d7e05544601c7c3f8cd90",
+                        "installed_by": ["fasta_edta_lai", "modules"]
+                    },
+                    "lai": {
+                        "branch": "main",
+                        "git_sha": "7e6e3cb41362a045c6bb6065903efa0eba246e87",
+                        "installed_by": ["fasta_edta_lai"]
+                    },
+                    "liftoff": {
+                        "branch": "main",
+                        "git_sha": "444b35f4e6285115f84d2bfce49fc0e6d8a2754e",
+                        "installed_by": ["modules"]
+                    },
+                    "repeatmodeler/builddatabase": {
+                        "branch": "main",
+                        "git_sha": "9da0567f685b2772f65290f2bd6d6347671c8310",
+                        "installed_by": ["modules"]
+                    },
+                    "repeatmodeler/repeatmodeler": {
+                        "branch": "main",
+                        "git_sha": "9da0567f685b2772f65290f2bd6d6347671c8310",
+                        "installed_by": ["modules"]
+                    }
+                }
+            },
+            "subworkflows": {
+                "pfr": {
+                    "fasta_edta_lai": {
+                        "branch": "main",
+                        "git_sha": "5ae026a98da1331433fa4cf5b667c9abdf43e395",
+                        "installed_by": ["subworkflows"]
+                    }
+                }
+            }
+        },
+        "git@github.com:kherronism/nf-modules.git": {
+            "modules": {
+                "kherronism": {
+                    "braker3": {
+                        "branch": "dev",
+                        "git_sha": "b01fec253f3b73b24e3f166a96d4beb49e58b0a6",
+                        "installed_by": ["modules"]
+                    },
+                    "repeatmasker": {
+                        "branch": "dev",
+                        "git_sha": "6778d5bb4c9f3d597753c699226fcde8d0811bfb",
+                        "installed_by": ["modules"]
+                    }
+                }
+            }
+        },
+        "https://github.com/nf-core/modules.git": {
+            "modules": {
+                "nf-core": {
+                    "cat/cat": {
+                        "branch": "master",
+                        "git_sha": "d593e8f6b7d1bbbb2acf43a4b9efeeac8d6720f2",
+                        "installed_by": ["modules"]
+                    },
+                    "cat/fastq": {
+                        "branch": "master",
+                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "installed_by": ["modules"]
+                    },
+                    "custom/dumpsoftwareversions": {
+                        "branch": "master",
+                        "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93",
+                        "installed_by": ["modules"]
+                    },
+                    "fastavalidator": {
+                        "branch": "master",
+                        "git_sha": "89ff95427f695086369d7927a3c17cea2a37a382",
+                        "installed_by": ["modules"]
+                    },
+                    "fastp": {
+                        "branch": "master",
+                        "git_sha": "d086322563bdbb08c94bf15a7db58a39ccdb1520",
+                        "installed_by": ["fastq_fastqc_umitools_fastp"]
+                    },
+                    "fastqc": {
+                        "branch": "master",
+                        "git_sha": "617777a807a1770f73deb38c80004bac06807eef",
+                        "installed_by": ["fastq_fastqc_umitools_fastp", "modules"]
+                    },
+                    "gffread": {
+                        "branch": "master",
+                        "git_sha": "a2d6c3082c5c44b4155a3246daff36701ee49af8",
+                        "installed_by": ["modules"]
+                    },
+                    "gunzip": {
+                        "branch": "master",
+                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "installed_by": ["modules"]
+                    },
+                    "samtools/cat": {
+                        "branch": "master",
+                        "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
+                        "installed_by": ["modules"]
+                    },
+                    "sortmerna": {
+                        "branch": "master",
+                        "git_sha": "ce558e30784469b88a16923ca96d81899d240b42",
+                        "installed_by": ["modules"]
+                    },
+                    "star/align": {
+                        "branch": "master",
+                        "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
+                        "installed_by": ["modules"]
+                    },
+                    "star/genomegenerate": {
+                        "branch": "master",
+                        "git_sha": "d87a6e2156c2099c09280fa70776eaf0a824817a",
+                        "installed_by": ["modules"]
+                    },
+                    "umitools/extract": {
+                        "branch": "master",
+                        "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53",
+                        "installed_by": ["fastq_fastqc_umitools_fastp"]
+                    }
+                }
+            },
+            "subworkflows": {
+                "nf-core": {
+                    "fastq_fastqc_umitools_fastp": {
+                        "branch": "master",
+                        "git_sha": "3e8b0c1144ccf60b7848efbdc2be285ff20b49ee",
+                        "installed_by": ["subworkflows"]
+                    }
+                }
+            }
         }
-      },
-      "subworkflows": {
-        "pfr": {
-          "fasta_edta_lai": {
-            "branch": "main",
-            "git_sha": "5ae026a98da1331433fa4cf5b667c9abdf43e395",
-            "installed_by": ["subworkflows"]
-          }
-        }
-      }
-    },
-    "git@github.com:kherronism/nf-modules.git": {
-      "modules": {
-        "kherronism": {
-          "braker3": {
-            "branch": "dev",
-            "git_sha": "b01fec253f3b73b24e3f166a96d4beb49e58b0a6",
-            "installed_by": ["modules"]
-          },
-          "repeatmasker": {
-            "branch": "dev",
-            "git_sha": "6778d5bb4c9f3d597753c699226fcde8d0811bfb",
-            "installed_by": ["modules"]
-          }
-        }
-      }
-    },
-    "https://github.com/nf-core/modules.git": {
-      "modules": {
-        "nf-core": {
-          "cat/cat": {
-            "branch": "master",
-            "git_sha": "d593e8f6b7d1bbbb2acf43a4b9efeeac8d6720f2",
-            "installed_by": ["modules"]
-          },
-          "cat/fastq": {
-            "branch": "master",
-            "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-            "installed_by": ["modules"]
-          },
-          "custom/dumpsoftwareversions": {
-            "branch": "master",
-            "git_sha": "37dee863936732fe7e05dc598bf6e183a8e7ef73",
-            "installed_by": ["modules"]
-          },
-          "fastavalidator": {
-            "branch": "master",
-            "git_sha": "89ff95427f695086369d7927a3c17cea2a37a382",
-            "installed_by": ["modules"]
-          },
-          "fastp": {
-            "branch": "master",
-            "git_sha": "d086322563bdbb08c94bf15a7db58a39ccdb1520",
-            "installed_by": ["fastq_fastqc_umitools_fastp"]
-          },
-          "fastqc": {
-            "branch": "master",
-            "git_sha": "617777a807a1770f73deb38c80004bac06807eef",
-            "installed_by": ["fastq_fastqc_umitools_fastp", "modules"]
-          },
-          "gffread": {
-            "branch": "master",
-            "git_sha": "a2d6c3082c5c44b4155a3246daff36701ee49af8",
-            "installed_by": ["modules"]
-          },
-          "gunzip": {
-            "branch": "master",
-            "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-            "installed_by": ["modules"]
-          },
-          "samtools/cat": {
-            "branch": "master",
-            "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
-            "installed_by": ["modules"]
-          },
-          "sortmerna": {
-            "branch": "master",
-            "git_sha": "ce558e30784469b88a16923ca96d81899d240b42",
-            "installed_by": ["modules"]
-          },
-          "star/align": {
-            "branch": "master",
-            "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
-            "installed_by": ["modules"]
-          },
-          "star/genomegenerate": {
-            "branch": "master",
-            "git_sha": "d87a6e2156c2099c09280fa70776eaf0a824817a",
-            "installed_by": ["modules"]
-          },
-          "umitools/extract": {
-            "branch": "master",
-            "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53",
-            "installed_by": ["fastq_fastqc_umitools_fastp"]
-          }
-        }
-      },
-      "subworkflows": {
-        "nf-core": {
-          "fastq_fastqc_umitools_fastp": {
-            "branch": "master",
-            "git_sha": "3e8b0c1144ccf60b7848efbdc2be285ff20b49ee",
-            "installed_by": ["subworkflows"]
-          }
-        }
-      }
     }
-  }
 }
diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml
index f0c63f6..9b3272b 100644
--- a/modules/nf-core/custom/dumpsoftwareversions/environment.yml
+++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml
@@ -4,4 +4,4 @@ channels:
   - bioconda
   - defaults
 dependencies:
-  - bioconda::multiqc=1.17
+  - bioconda::multiqc=1.19
diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf
index 7685b33..f218761 100644
--- a/modules/nf-core/custom/dumpsoftwareversions/main.nf
+++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf
@@ -4,8 +4,8 @@ process CUSTOM_DUMPSOFTWAREVERSIONS {
     // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/multiqc:1.17--pyhdfd78af_0' :
-        'biocontainers/multiqc:1.17--pyhdfd78af_0' }"
+        'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' :
+        'biocontainers/multiqc:1.19--pyhdfd78af_0' }"
 
     input:
     path versions
diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap
index 29e7244..5f59a93 100644
--- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap
+++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap
@@ -2,7 +2,7 @@
     "Should run without failures": {
         "content": [
             [
-                "versions.yml:md5,3843ac526e762117eedf8825b40683df"
+                "versions.yml:md5,76d454d92244589d32455833f7c1ba6d"
             ],
             [
                 "data: \"<style>\\n#nf-core-versions tbody:nth-child(even) {\\n    background-color: #f2f2f2;\\n\\",
@@ -10,8 +10,8 @@
                 "  >\\n    <thead>\\n        <tr>\\n            <th> Process Name </th>\\n            <th>\\",
                 "  \\ Software </th>\\n            <th> Version  </th>\\n        </tr>\\n    </thead>\\n\\",
                 "  \\n<tbody>\\n<tr>\\n    <td><samp>CUSTOM_DUMPSOFTWAREVERSIONS</samp></td>\\n    <td><samp>python</samp></td>\\n\\",
-                "  \\    <td><samp>3.12.0</samp></td>\\n</tr>\\n\\n<tr>\\n    <td><samp></samp></td>\\n \\",
-                "  \\   <td><samp>yaml</samp></td>\\n    <td><samp>6.0.1</samp></td>\\n</tr>\\n\\n</tbody>\\n\\",
+                "  \\    <td><samp>3.11.7</samp></td>\\n</tr>\\n\\n<tr>\\n    <td><samp></samp></td>\\n \\",
+                "  \\   <td><samp>yaml</samp></td>\\n    <td><samp>5.4.1</samp></td>\\n</tr>\\n\\n</tbody>\\n\\",
                 "  <tbody>\\n<tr>\\n    <td><samp>TOOL1</samp></td>\\n    <td><samp>tool1</samp></td>\\n\\",
                 "  \\    <td><samp>0.11.9</samp></td>\\n</tr>\\n\\n</tbody>\\n<tbody>\\n<tr>\\n    <td><samp>TOOL2</samp></td>\\n\\",
                 "  \\    <td><samp>tool2</samp></td>\\n    <td><samp>1.9</samp></td>\\n</tr>\\n\\n</tbody>\\n\\",
@@ -19,8 +19,8 @@
             ],
             [
                 "CUSTOM_DUMPSOFTWAREVERSIONS:",
-                "  python: 3.12.0",
-                "  yaml: 6.0.1",
+                "  python: 3.11.7",
+                "  yaml: 5.4.1",
                 "TOOL1:",
                 "  tool1: 0.11.9",
                 "TOOL2:",
@@ -28,6 +28,6 @@
                 "Workflow:"
             ]
         ],
-        "timestamp": "2024-01-05T00:18:43.461970077"
+        "timestamp": "2024-01-09T23:01:18.710682"
     }
 }
\ No newline at end of file
diff --git a/modules/nf-core/umitools/extract/tests/nextflow.config b/modules/nf-core/umitools/extract/tests/nextflow.config
index c866f5a..628f5fc 100644
--- a/modules/nf-core/umitools/extract/tests/nextflow.config
+++ b/modules/nf-core/umitools/extract/tests/nextflow.config
@@ -1,7 +1,7 @@
 process {
 
     publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
-    
+
     withName: UMITOOLS_EXTRACT {
         ext.args = '--bc-pattern="NNNN"'
     }
diff --git a/pangene_local b/pangene_local
index 8e8e692..255edb9 100755
--- a/pangene_local
+++ b/pangene_local
@@ -1,16 +1,20 @@
 #!/usr/bin/env bash
 
+NO_FORMAT="\033[0m"
+C_RED="\033[38;5;9m"
+F_BOLD="\033[1m"
+
 [[ $1 == '-stub' ]] \
     && stub='-stub' \
     || stub=''
 
 [[ $1 == '-stub' ]] \
     && echo 'Executing with -stub' \
-    || echo 'Executing without -stub'
+    || echo -e "${C_RED}${F_BOLD}Executing without -stub${NO_FORMAT}"
 
 nextflow \
     main.nf \
     -profile local,docker \
     -resume \
     $stub \
-    -params-file conf/test_params.json
\ No newline at end of file
+    -params-file conf/test_params.json
diff --git a/pangene_pfr b/pangene_pfr
index ca1a335..608798c 100644
--- a/pangene_pfr
+++ b/pangene_pfr
@@ -19,4 +19,4 @@ export APPTAINER_BINDPATH="$APPTAINER_BINDPATH,$TMPDIR:$TMPDIR,$TMPDIR:/tmp"
 nextflow \
     main.nf \
     -profile pfr,apptainer \
-    -resume
\ No newline at end of file
+    -resume
diff --git a/subworkflows/local/align_rnaseq.nf b/subworkflows/local/align_rnaseq.nf
index 544fea9..05cd2fa 100644
--- a/subworkflows/local/align_rnaseq.nf
+++ b/subworkflows/local/align_rnaseq.nf
@@ -6,7 +6,7 @@ workflow ALIGN_RNASEQ {
     reads_target                // channel: [ meta, assembly_id ]
     trim_reads                  // channel: [ meta, [ fq ] ]
     assembly_index              // channel: [ meta2, star_index ]
-    
+
     main:
     ch_versions                 = Channel.empty()
 
@@ -33,7 +33,7 @@ workflow ALIGN_RNASEQ {
     def star_ignore_sjdbgtf     = true
     def seq_platform            = false
     def seq_center              = false
-    
+
     STAR_ALIGN(
         ch_star_inputs.map { meta, fastq, index -> [ meta, fastq ] },
         ch_star_inputs.map { meta, fastq, index -> [ [ id: meta.target_assembly ], index ] },
@@ -67,10 +67,10 @@ workflow ALIGN_RNASEQ {
                                 | mix(
                                     ch_star_bam_branch.bam
                                 )
-    
+
     ch_versions                 = ch_versions.mix(SAMTOOLS_CAT.out.versions.first())
-    
+
     emit:
     bam                         = ch_samtools_bam   // channel: [ [ id, single_end, target_assembly ], [ bam ] ]
     versions                    = ch_versions       // channel: [ versions.yml ]
-}
\ No newline at end of file
+}
diff --git a/subworkflows/local/extract_samples.nf b/subworkflows/local/extract_samples.nf
index 75437da..947c0b7 100644
--- a/subworkflows/local/extract_samples.nf
+++ b/subworkflows/local/extract_samples.nf
@@ -29,13 +29,13 @@ workflow EXTRACT_SAMPLES {
     | set { ch_reads }
 
     reads = ch_reads.map { meta, fastq -> [[id:meta.id, single_end:meta.single_end], fastq]}
-    
+
     ch_reads
     | flatMap { meta, fastq ->
         meta.target_assemblies.collect { assembly -> [[id:meta.id, single_end:meta.single_end], assembly] }
     }
     | set { assemblies }
-    
+
     emit:
     reads                                       // channel: [ val(meta), [ reads ] ]
     assemblies                                  // channel: [ val(meta), val(assembly) ]
@@ -68,4 +68,4 @@ def create_fastq_channel(LinkedHashMap row, sheetPath) {
     }
 
     return fastq_meta
-}
\ No newline at end of file
+}
diff --git a/subworkflows/local/fasta_liftoff.nf b/subworkflows/local/fasta_liftoff.nf
index 8952e1d..4c59ba3 100644
--- a/subworkflows/local/fasta_liftoff.nf
+++ b/subworkflows/local/fasta_liftoff.nf
@@ -8,7 +8,7 @@ workflow FASTA_LIFTOFF {
     target_assemby                  // Channel: [ meta, fasta ]
     xref_fasta                      // Channel: [ meta2, fasta ]
     xref_gff                        // Channel: [ meta2, gff3 ]
-    
+
     main:
     ch_versions                     = Channel.empty()
 
@@ -20,12 +20,12 @@ workflow FASTA_LIFTOFF {
                                     }
 
     GUNZIP_FASTA ( ch_xref_fasta_branch.gz )
-    
+
     ch_xref_gunzip_fasta            = GUNZIP_FASTA.out.gunzip
                                     | mix(
                                         ch_xref_fasta_branch.rest
                                     )
-    
+
     ch_versions                     = ch_versions.mix(GUNZIP_FASTA.out.versions.first())
 
     // MODULE: GUNZIP as GUNZIP_GFF
@@ -57,7 +57,7 @@ workflow FASTA_LIFTOFF {
                                     | join(ch_gffread_inputs)
                                     | map { fid, gffread_gff, meta, gff -> [ meta, gffread_gff ] }
                                     // meta insertion
-    
+
     ch_versions                     = ch_versions.mix(GFFREAD.out.versions.first())
 
     // MODULE: LIFTOFF
@@ -89,10 +89,10 @@ workflow FASTA_LIFTOFF {
     ch_liftoff_gff3                 = LIFTOFF.out.polished_gff3
                                     | map { meta, gff -> [ [ id: meta.target_assemby ], gff ] }
                                     | groupTuple
-    
+
     ch_versions                     = ch_versions.mix(LIFTOFF.out.versions.first())
 
     emit:
     gff3        = ch_liftoff_gff3               // [ meta, [ gff3 ] ]
     versions    = ch_versions                   // [ versions.yml ]
-}
\ No newline at end of file
+}
diff --git a/subworkflows/local/prepare_assembly.nf b/subworkflows/local/prepare_assembly.nf
index 9fc6244..d18f5ce 100644
--- a/subworkflows/local/prepare_assembly.nf
+++ b/subworkflows/local/prepare_assembly.nf
@@ -74,7 +74,7 @@ workflow PREPARE_ASSEMBLY {
     ch_edta_inputs              = repeat_annotator != 'edta'
                                 ? Channel.empty()
                                 : ch_annotator_inputs
-    
+
     FASTA_EDTA_LAI(
         ch_edta_inputs,
         [],
@@ -89,12 +89,12 @@ workflow PREPARE_ASSEMBLY {
                                 : ch_annotator_inputs
 
     REPEATMODELER_BUILDDATABASE ( ch_repeatmodeler_inputs )
-    
+
     ch_versions                 = ch_versions.mix(REPEATMODELER_BUILDDATABASE.out.versions.first())
 
     // MODULE: REPEATMODELER_REPEATMODELER
     REPEATMODELER_REPEATMODELER ( REPEATMODELER_BUILDDATABASE.out.db )
-    
+
     ch_assembly_and_te_lib      = ch_validated_assembly
                                 | join(
                                     repeat_annotator == 'edta'
@@ -103,7 +103,7 @@ workflow PREPARE_ASSEMBLY {
                                 )
 
     ch_versions                 = ch_versions.mix(REPEATMODELER_REPEATMODELER.out.versions.first())
-    
+
     // MODULE: REPEATMASKER
     REPEATMASKER(
         ch_assembly_and_te_lib.map { meta, assembly, teLib -> [meta, assembly] },
@@ -120,10 +120,10 @@ workflow PREPARE_ASSEMBLY {
 
     ch_assembly_index           = STAR_GENOMEGENERATE.out.index
     ch_versions                 = ch_versions.mix(STAR_GENOMEGENERATE.out.versions.first())
-    
+
     emit:
     target_assemby              = ch_validated_assembly         // channel: [ meta, fasta ]
     masked_target_assembly      = REPEATMASKER.out.fasta_masked // channel: [ meta, fasta ]
     target_assemby_index        = ch_assembly_index             // channel: [ meta, star_index ]
     versions                    = ch_versions                   // channel: [ versions.yml ]
-}
\ No newline at end of file
+}
diff --git a/subworkflows/local/prepare_ext_prots.nf b/subworkflows/local/prepare_ext_prots.nf
index fff42ae..ee65f77 100644
--- a/subworkflows/local/prepare_ext_prots.nf
+++ b/subworkflows/local/prepare_ext_prots.nf
@@ -4,7 +4,7 @@ include { CAT_CAT as CAT_PROTEIN_FASTAS } from '../../modules/nf-core/cat/cat'
 workflow PREPARE_EXT_PROTS {
     take:
     ch_ext_prot_fastas          // Channel: [ meta, fasta ]
-    
+
     main:
     ch_versions                 = Channel.empty()
 
@@ -14,22 +14,22 @@ workflow PREPARE_EXT_PROTS {
                                     gz: "$file".endsWith(".gz")
                                     rest: !"$file".endsWith(".gz")
                                 }
-    
+
     GUNZIP ( ch_ext_prot_seqs_branch.gz )
-    
+
     ch_ext_prot_gunzip_fastas   = GUNZIP.out.gunzip.mix(ch_ext_prot_seqs_branch.rest)
                                 | map { meta, filePath -> filePath }
                                 | collect
                                 | map { fileList -> [ [ id: "ext_protein_seqs" ], fileList ] }
-    
+
     ch_versions                 = ch_versions.mix(GUNZIP.out.versions.first())
 
     // MODULE: CAT_CAT as CAT_PROTEIN_FASTAS
     CAT_PROTEIN_FASTAS ( ch_ext_prot_gunzip_fastas )
 
     ch_versions                 = ch_versions.mix(CAT_PROTEIN_FASTAS.out.versions)
-    
+
     emit:
     ext_prots_fasta             = CAT_PROTEIN_FASTAS.out.file_out   // Channel: [ meta, fasta ]
     versions                    = ch_versions                       // Channel: [ versions.yml ]
-}
\ No newline at end of file
+}
diff --git a/subworkflows/local/preprocess_rnaseq.nf b/subworkflows/local/preprocess_rnaseq.nf
index ba444bb..9466104 100644
--- a/subworkflows/local/preprocess_rnaseq.nf
+++ b/subworkflows/local/preprocess_rnaseq.nf
@@ -13,10 +13,10 @@ workflow PREPROCESS_RNASEQ {
     min_trimmed_reads               // val: Integer
     remove_ribo_rna                 // val: true|false
     sortmerna_fastas                // channel: [ [ fasta ] ]
-    
+
     main:
     ch_versions = Channel.empty()
-    
+
     // SUBWORKFLOW: EXTRACT_SAMPLES
     EXTRACT_SAMPLES(
         samplesheet,
@@ -55,7 +55,7 @@ workflow PREPROCESS_RNASEQ {
     def with_umi                    = false
     def skip_umi_extract            = true
     def umi_discard_read            = false
-    
+
     FASTQ_FASTQC_UMITOOLS_FASTP (
         ch_cat_fastq,
         skip_fastqc,
@@ -86,16 +86,16 @@ workflow PREPROCESS_RNASEQ {
         remove_ribo_rna ? ch_trim_reads : Channel.empty(),
         sortmerna_fastas
     )
-    
+
     ch_emitted_reads                = remove_ribo_rna
                                     ? SORTMERNA.out.reads
                                     : ch_trim_reads
     ch_versions                     = ch_versions.mix(SORTMERNA.out.versions.first())
 
-    
+
 
     emit:
     trim_reads                      = ch_emitted_reads  // channel: [ meta, [ fq ] ]
     reads_target                    = ch_reads_target   // channel: [ meta, assembly_id ]
     versions                        = ch_versions       // channel: [ versions.yml ]
-}
\ No newline at end of file
+}
diff --git a/workflows/pangene.nf b/workflows/pangene.nf
index 72b9fd6..8512ff9 100644
--- a/workflows/pangene.nf
+++ b/workflows/pangene.nf
@@ -26,7 +26,7 @@ workflow PANGENE {
     ch_samplesheet              = params.samplesheet
                                 ? Channel.fromPath(params.samplesheet, checkIfExists: true)
                                 : Channel.empty()
-    
+
     ch_tar_assm_str             = Channel.of(
                                     params.target_assemblies
                                     .collect { tag, fastaPath -> tag.strip() }.join(",")
@@ -49,7 +49,7 @@ workflow PANGENE {
                                     [ [ id: fileHandle.getSimpleName() ], fileHandle]
                                 }
                                 : Channel.empty()
-    
+
     ch_xref_mm                  = params.liftoff_xref_annotations
                                 ? Channel.fromList(params.liftoff_xref_annotations)
                                 | multiMap { fasta, gff ->
@@ -116,7 +116,7 @@ workflow PANGENE {
                                     ch_ext_prots_fasta.map { meta, filePath -> filePath }.ifEmpty(null)
                                 )
                                 | map { meta, fasta, bam, prots -> [ meta, fasta, bam ?: [], prots ?: [] ] }
-    
+
     def rnaseq_sets_dirs        = []
     def rnaseq_sets_ids         = []
     def hintsfile               = []
@@ -147,4 +147,4 @@ workflow PANGENE {
     CUSTOM_DUMPSOFTWAREVERSIONS (
         ch_versions.unique().collectFile(name: 'collated_versions.yml')
     )
-}
\ No newline at end of file
+}

From a4ada59767eca00d286e00b98d157df0974304d6 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Thu, 11 Jan 2024 12:07:00 +1300
Subject: [PATCH 57/59] Updated manifest

---
 conf/manifest.config | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/conf/manifest.config b/conf/manifest.config
index fd7c8f6..95537cd 100644
--- a/conf/manifest.config
+++ b/conf/manifest.config
@@ -1,10 +1,10 @@
 manifest {
     name                    = 'pangene'
-    author                  = """Usman Rashid"""
+    author                  = """Usman Rashid, Jason Shiller"""
     homePage                = 'https://github.com/PlantandFoodResearch/pan-gene'
     description             = """A NextFlow pipeline for pan-genome annotation"""
     mainScript              = 'main.nf'
     nextflowVersion         = '!>=23.04.4'
-    version                 = '0.1'
+    version                 = '0.2'
     doi                     = ''
 }

From 4de72ab9f1fdbadc43bcc2d2959f76ac944da662 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Thu, 11 Jan 2024 13:13:08 +1300
Subject: [PATCH 58/59] Fixed linting errors

---
 modules/local/samplesheet_check/main.nf |  4 ++--
 modules/local/validate_params.nf        | 10 +++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/modules/local/samplesheet_check/main.nf b/modules/local/samplesheet_check/main.nf
index f0437a6..4fb60f8 100644
--- a/modules/local/samplesheet_check/main.nf
+++ b/modules/local/samplesheet_check/main.nf
@@ -3,7 +3,7 @@
 // MIT: https://github.com/nf-core/rnaseq/blob/master/LICENSE
 //
 // Changes:
-// Added channel permissible_target_assemblies 
+// Added channel permissible_target_assemblies
 
 process SAMPLESHEET_CHECK {
     tag "$samplesheet"
@@ -37,4 +37,4 @@ process SAMPLESHEET_CHECK {
         python: \$(python --version | sed 's/Python //g')
     END_VERSIONS
     """
-}
\ No newline at end of file
+}
diff --git a/modules/local/validate_params.nf b/modules/local/validate_params.nf
index f6ce18a..460ce80 100644
--- a/modules/local/validate_params.nf
+++ b/modules/local/validate_params.nf
@@ -1,6 +1,6 @@
 def validateParams(params) {
     validateFastaTags(params)
-    
+
     if (!params['repeat_annotator']) {
         error "Error: repeat_annotator must be either 'repeatmodeler' or 'edta'"
     }
@@ -8,7 +8,7 @@ def validateParams(params) {
     if ( !(params['repeat_annotator'] in ['repeatmodeler', 'edta']) ) {
         error "Error: repeat_annotator must be either 'repeatmodeler' or 'edta'"
     }
-    
+
     validateTETags(params)
     validateTEFastaCorrespondence(params)
 
@@ -67,7 +67,7 @@ def validateTEFastaCorrespondence(params) {
     if(!params["te_libraries"]) {
         return
     }
-    
+
     def listOfTETuples   = params["te_libraries"]
     def listOfFastaTuples   = params["target_assemblies"]
 
@@ -84,7 +84,7 @@ def validateTEFastaCorrespondence(params) {
 def validateRiboDBManifest(params) {
     if (params.remove_ribo_rna) {
         file_ribo_db = file(params.ribo_database_manifest, checkIfExists: true)
-        
+
         if (file_ribo_db.isEmpty()) {exit 1, "File provided with --ribo_database_manifest is empty: ${file_ribo_db.getName()}!"}
     }
 }
@@ -101,4 +101,4 @@ def validateLiftoffXrefs(params) {
 
 def isNotListOfLists(thisOne, subListSize) {
     return (!(thisOne instanceof List) || thisOne.isEmpty() || thisOne.any { !(it instanceof List) || it.size() != subListSize })
-}
\ No newline at end of file
+}

From 12968f44c1423c6cd8ecbf9628a4a2b0813a74c1 Mon Sep 17 00:00:00 2001
From: Usman Rashid <usman@smme.edu.pk>
Date: Thu, 11 Jan 2024 14:09:44 +1300
Subject: [PATCH 59/59] Updated base config for docker

---
 conf/base.config | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index 5f02f17..6b0d419 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -23,9 +23,7 @@ profiles {
 
     docker {
         docker.enabled      = true
-        docker.userEmulation= false
-        docker.fixOwnership = true
-        docker.runOptions   = '--platform=linux/amd64'
+        docker.runOptions   = '-u $(id -u):$(id -g) --platform=linux/amd64'
         docker.registry     = 'quay.io'
     }
 }