Skip to content

Commit

Permalink
Fix one-to-many situation
Browse files Browse the repository at this point in the history
  • Loading branch information
nakib103 authored and nuno-agostinho committed Jul 4, 2023
1 parent 6e1af6e commit 78c86d6
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 8 deletions.
3 changes: 2 additions & 1 deletion nextflow/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ The following config files are used and can be modified depending on user requir
--vep_config FILENAME VEP config file. Alternatively, can also be a directory containing VEP INI files. Default: vep_config/vep.ini
--cpus INT Number of CPUs to use. Default: 1
--outdir DIRNAME Name of output directory. Default: outdir
--output_prefix PREFIX Output filename prefix. The generated output file will have name <vcf>-<output_prefix>.vcf.gz
--output_prefix PREFIX Output filename prefix. The generated output file will have name <output_prefix>_VEP.vcf.gz.
NOTE: Do not use this parameter if you are expecting multiple output files.
--skip_check [0,1] Skip check for tabix index file of input VCF. Enables use of cache with -resume. Default: 0
```
Expand Down
9 changes: 7 additions & 2 deletions nextflow/nf_modules/merge_VCF.nf
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,19 @@ process mergeVCF {
cache 'lenient'

input:
tuple val(original_vcf), path(vcf_files), path(index_files), path(vep_config), val(index_type)
val output_dir
tuple val(original_vcf), path(vcf_files), path(index_files), val(vep_config), val(index_type),
val(one_to_many),
val(output_dir)

output:
val("${output_dir}/${merged_vcf}")

script:
merged_vcf = merged_vcf ?: file(original_vcf).getName().replace(".vcf", "_VEP.vcf")
merged_vcf = one_to_many ? merged_vcf.replace(
"_VEP.vcf",
"_" + file(vep_config).getName().replace(".ini", "") + "_VEP.vcf"
) : merged_vcf
index_flag = index_type == "tbi" ? "-t" : "-c"

"""
Expand Down
2 changes: 1 addition & 1 deletion nextflow/nf_modules/run_vep.nf
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ process runVEP {
tuple val(original_vcf), path(vcf), path(vcf_index), path(vep_config), val(index_type)

output:
tuple val(original_vcf), path(out_vcf), path("${out_vcf}.{tbi,csi}"), path(vep_config), val(index_type), emit: files
tuple val(original_vcf), path(out_vcf), path("${out_vcf}.{tbi,csi}"), val("${vep_config}"), val(index_type), emit: files
path("*.vcf.gz_summary.*")

script:
Expand Down
17 changes: 13 additions & 4 deletions nextflow/workflows/run_vep.nf
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,19 @@ workflow vep {
}

// convert output dir to absolute path if necessary
output_dir = toAbsolute(output_dir)
output_dir = Channel.fromPath(toAbsolute(output_dir))

// set if it is a one_to_many situation (single VCF and multiple ini file)
// in this situation we produce output files with different names
one_to_many = vcf.count()
.combine( vep_config.count() )
.map{ it[0] == 1 && it[1] != 1 }

// process input and create Channel
// this works like 'merge' operator and thus might make the pipeline un-resumable
// we might think of using 'toSortedList' and generate appropriate input from the 'processInput' module
processInput(vcf.combine(vep_config))

// Prepare input VCF files (bgzip + tabix)
checkVCF(processInput.out)

Expand All @@ -121,9 +127,12 @@ workflow vep {

// Run VEP for each split VCF file and for each VEP config
runVEP(splitVCF.out.transpose())

// Merge split VCF files (creates one output VCF for each input VCF)
mergeVCF(runVEP.out.files.groupTuple(by: [0, 3, 4]), output_dir)
mergeVCF(runVEP.out.files.groupTuple(by: [0, 3, 4])
.combine(one_to_many
.combine(output_dir))
)
emit:
mergeVCF.out
}
Expand Down

0 comments on commit 78c86d6

Please sign in to comment.