Skip to content

Commit

Permalink
Merge pull request #135 from TORCH-Consortium/develop
Browse files Browse the repository at this point in the history
Add samplesheet validation
  • Loading branch information
abhi18av authored Nov 6, 2022
2 parents fa4a5c4 + 10e1aea commit c9fa840
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 52 deletions.
3 changes: 2 additions & 1 deletion bin/samplesheet_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
fail = True

if not fail:
print('No errors found')
print('Samplesheet validation checks passed')
exit(0)
else:
print('Samplesheet validation checks failed')
exit(1)
2 changes: 1 addition & 1 deletion conf/conda_local.config
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ conda {
process {

withName:
'GATK.*|LOFREQ.*|DELLY.*|TBPROFILER.*|MULTIQC.*|FASTQC.*|UTILS.*|FASTQ.*' {
'GATK.*|LOFREQ.*|DELLY.*|TBPROFILER.*|MULTIQC.*|FASTQC.*|UTILS.*|FASTQ.*|SAMPLESHEET.*' {
//environment does exist:
conda = "${params.conda_envs_location}/xbs-nf-env-1"

Expand Down
2 changes: 1 addition & 1 deletion conf/docker.config
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ process {
//----------------------------------------------

withName:
'GATK.*|LOFREQ.*|DELLY.*|TBPROFILER.*|MULTIQC.*|FASTQC.*|UTILS.*|FASTQ.*' {
'GATK.*|LOFREQ.*|DELLY.*|TBPROFILER.*|MULTIQC.*|FASTQC.*|UTILS.*|FASTQ.*|SAMPLESHEET.*' {
container = "rg.nl-ams.scw.cloud/xbs-nf-containers/xbs-nf-container-1:0.9.8"
}

Expand Down
49 changes: 2 additions & 47 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,50 +13,6 @@ include { MERGE_WF } from './workflows/merge_wf.nf'
include { QUALITY_CHECK_WF } from './workflows/quality_check_wf.nf'
include { REPORTS_WF } from './workflows/reports_wf.nf'

//================================================================================
// Prepare channels
//================================================================================


//NOTE: Expected structure of input CSV samplesheet
// 0 1 2 3 4 5 6 7 8
// Study,Sample,Library,Attempt,R1,R2,Flowcell,Lane,Index Sequence

reads_ch = Channel.fromPath(params.input_samplesheet)
.splitCsv(header: false, skip: 1)
.map { row -> {
study = row[0]
sample = row[1]
library = row[2]
attempt = row[3]
read1 = row[4]
read2 = row[5]
flowcell = row[6]
lane = row[7]
index_sequence = row[8]

//NOTE: Platform is hard-coded to illumina
bam_rg_string ="@RG\\tID:${flowcell}.${lane}\\tSM:${study}.${sample}\\tPL:illumina\\tLB:lib${library}\\tPU:${flowcell}.${lane}.${index_sequence}"

unique_sample_id = "${study}.${sample}.L${library}.A${attempt}.${flowcell}.${lane}.${index_sequence}"

//Accomodate single/multi reads
if (read1 && read2) {

return tuple(unique_sample_id, bam_rg_string, tuple(file(read1), file(read2)))

} else if (read1) {

return tuple(unique_sample_id, bam_rg_string, tuple(file(read1)))

} else {

return tuple(unique_sample_id, bam_rg_string, tuple(file(read2)))

}
}
}


//================================================================================
// Main workflow
Expand All @@ -66,11 +22,11 @@ workflow {

if (params.only_validate_fastqs) {

VALIDATE_FASTQS_WF(reads_ch)
VALIDATE_FASTQS_WF(params.input_samplesheet)

} else {

validated_reads_ch = VALIDATE_FASTQS_WF(reads_ch)
validated_reads_ch = VALIDATE_FASTQS_WF(params.input_samplesheet)

QUALITY_CHECK_WF(validated_reads_ch)

Expand All @@ -79,7 +35,6 @@ workflow {

MULTIPLE_INFECTIONS_WF(MAP_WF.out.rejected_sorted_reads_ch)


CALL_WF(MAP_WF.out.approved_sorted_reads_ch)

collated_gvcfs_ch = CALL_WF.out.gvcf_ch
Expand Down
14 changes: 14 additions & 0 deletions modules/utils/samplesheet_validation.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
process SAMPLESHEET_VALIDATION {

input:
path(samplesheet)

output:
path(samplesheet)

script:

"""
samplesheet_validation.py ${samplesheet}
"""
}
44 changes: 42 additions & 2 deletions workflows/validate_fastqs_wf.nf
Original file line number Diff line number Diff line change
@@ -1,13 +1,53 @@
include { FASTQ_VALIDATOR } from '../modules/fastq_utils/validator.nf' addParams ( params.FASTQ_VALIDATOR )
include { UTILS_FASTQ_COHORT_VALIDATION } from '../modules/utils/fastq_cohort_validation.nf' addParams ( params.UTILS_FASTQ_COHORT_VALIDATION )
include { SAMPLESHEET_VALIDATION } from '../modules/utils/samplesheet_validation.nf'

workflow VALIDATE_FASTQS_WF {
take:
reads_ch
samplesheet

main:
SAMPLESHEET_VALIDATION(samplesheet)

//NOTE: Expected structure of input CSV samplesheet
// 0 1 2 3 4 5 6 7 8
// Study,Sample,Library,Attempt,R1,R2,Flowcell,Lane,Index Sequence

reads_ch = SAMPLESHEET_VALIDATION.out
.splitCsv(header: false, skip: 1)
.map { row -> {
study = row[0]
sample = row[1]
library = row[2]
attempt = row[3]
read1 = row[4]
read2 = row[5]
flowcell = row[6]
lane = row[7]
index_sequence = row[8]

//NOTE: Platform is hard-coded to illumina
bam_rg_string ="@RG\\tID:${flowcell}.${lane}\\tSM:${study}.${sample}\\tPL:illumina\\tLB:lib${library}\\tPU:${flowcell}.${lane}.${index_sequence}"

unique_sample_id = "${study}.${sample}.L${library}.A${attempt}.${flowcell}.${lane}.${index_sequence}"

//Accomodate single/multi reads
if (read1 && read2) {

return tuple(unique_sample_id, bam_rg_string, tuple(file(read1), file(read2)))

} else if (read1) {

return tuple(unique_sample_id, bam_rg_string, tuple(file(read1)))

} else {

return tuple(unique_sample_id, bam_rg_string, tuple(file(read2)))

}
}
}

//FIXME: Add the samplesheet validator process for samplesheet_validation.py script

FASTQ_VALIDATOR(reads_ch)

Expand Down

0 comments on commit c9fa840

Please sign in to comment.