-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.nf
108 lines (74 loc) · 3.21 KB
/
main.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#!/usr/bin/env nextflow
log.info """\
=========================================
COMPARE TE (v1.0)
-----------------------------------------
Authors:
- Chris Wyatt <[email protected]>
- Rahia Mashoodh <>
-----------------------------------------
Copyright (c) 2024
=========================================""".stripIndent()
include { DOWNLOAD_NCBI } from './modules/local/download_ncbi.nf'
include { GFFREAD } from './modules/local/gffread.nf'
include { ORTHOFINDER } from './modules/local/orthofinder.nf'
include { EARLGREY } from './modules/local/earlgrey.nf'
include { HITE } from './modules/local/hite.nf'
include { validateParameters; paramsHelp; paramsSummaryLog } from 'plugin/nf-validation'
include { CUSTOM_DUMPSOFTWAREVERSIONS } from './modules/nf-core/custom/dumpsoftwareversions/main'
workflow {
if (params.help) {
log.info paramsHelp("nextflow run main.nf --input input_file.csv")
exit 0
}
//Check if input is provided
in_file = params.input != null ? Channel.fromPath(params.input) : errorMessage()
in_file
.splitCsv()
.branch {
ncbi: it.size() == 2
path: it.size() == 3
}
.set { input_type }
twocol = input_type.ncbi
def fastaExtensions = ['.fa', '.fasta', '.fna', '.fa.gz', '.fasta.gz', '.fna.gz']
// Separate files into FASTA files and other files
genomeonly = twocol.filter { row ->
def filePath = row[1] // assuming the file path is the second column
fastaExtensions.any { filePath.endsWith(it) }
}
refseqids = twocol.filter { row ->
def filePath = row[1] // assuming the file path is the second column
!fastaExtensions.any { filePath.endsWith(it) }
}
//Make a channel for version outputs:
ch_versions = Channel.empty()
// Validate input parameters --- ##need to add with nf-core schema build. !
//validateParameters()
// Print summary of supplied parameters
log.info paramsSummaryLog(workflow)
DOWNLOAD_NCBI ( refseqids )
ch_versions = ch_versions.mix(DOWNLOAD_NCBI.out.versions.first())
//Checks if paths are S3 objects if not ensures absolute paths are used for user inputted fasta and gff files
input_type.path.map{ name, fasta , gff -> if (fasta =~ /^s3/ ) { full_fasta = fasta } else { full_fasta = new File(fasta).getAbsolutePath()}; if (gff =~ /^s3/) { full_gff = gff } else { full_gff = new File(gff).getAbsolutePath()}; [name, full_fasta, full_gff] }.set{ local_full_tuple }
//Split channel into 2, keep tuple the same for gffread and take just sample id and fasta for fastavalidator
DOWNLOAD_NCBI.out.genome.mix(local_full_tuple)
.multiMap { it ->
gffread: it
tuple: [[ id: it[0]], it[1]]
}
.set { fasta_inputs }
GFFREAD ( DOWNLOAD_NCBI.out.genome.mix(input_type.path) )
ch_versions = ch_versions.mix(GFFREAD.out.versions.first())
merge_ch = GFFREAD.out.longest.collect()
if (params.orthofinder){
ORTHOFINDER ( merge_ch )
}
//Only takes NCBI genomes, but later we need to add locally input genomes.
if (params.earlgrey){
EARLGREY (GFFREAD.out.just_genome.mix(genomeonly))
}
if (params.hite){
HITE (GFFREAD.out.just_genome.mix(genomeonly))
}
}