diff --git a/bin/samplesheet_validation.py b/bin/samplesheet_validation.py index b8360304..115bd1b9 100755 --- a/bin/samplesheet_validation.py +++ b/bin/samplesheet_validation.py @@ -27,7 +27,8 @@ fail = True if not fail: - print('No errors found') + print('Samplesheet validation checks passed') exit(0) else: + print('Samplesheet validation checks failed') exit(1) diff --git a/conf/conda_local.config b/conf/conda_local.config index fb8dacfd..4ae87445 100644 --- a/conf/conda_local.config +++ b/conf/conda_local.config @@ -9,7 +9,7 @@ conda { process { withName: - 'GATK.*|LOFREQ.*|DELLY.*|TBPROFILER.*|MULTIQC.*|FASTQC.*|UTILS.*|FASTQ.*' { + 'GATK.*|LOFREQ.*|DELLY.*|TBPROFILER.*|MULTIQC.*|FASTQC.*|UTILS.*|FASTQ.*|SAMPLESHEET.*' { //environment does exist: conda = "${params.conda_envs_location}/xbs-nf-env-1" diff --git a/conf/docker.config b/conf/docker.config index f9d8650f..0f6f3bd3 100644 --- a/conf/docker.config +++ b/conf/docker.config @@ -5,7 +5,7 @@ process { //---------------------------------------------- withName: - 'GATK.*|LOFREQ.*|DELLY.*|TBPROFILER.*|MULTIQC.*|FASTQC.*|UTILS.*|FASTQ.*' { + 'GATK.*|LOFREQ.*|DELLY.*|TBPROFILER.*|MULTIQC.*|FASTQC.*|UTILS.*|FASTQ.*|SAMPLESHEET.*' { container = "rg.nl-ams.scw.cloud/xbs-nf-containers/xbs-nf-container-1:0.9.8" } diff --git a/main.nf b/main.nf index 941443ec..f0dd35f2 100644 --- a/main.nf +++ b/main.nf @@ -13,50 +13,6 @@ include { MERGE_WF } from './workflows/merge_wf.nf' include { QUALITY_CHECK_WF } from './workflows/quality_check_wf.nf' include { REPORTS_WF } from './workflows/reports_wf.nf' -//================================================================================ -// Prepare channels -//================================================================================ - - -//NOTE: Expected structure of input CSV samplesheet -// 0 1 2 3 4 5 6 7 8 -// Study,Sample,Library,Attempt,R1,R2,Flowcell,Lane,Index Sequence - -reads_ch = Channel.fromPath(params.input_samplesheet) - .splitCsv(header: false, skip: 1) - .map { row -> { - study = row[0] - sample = row[1] - library = row[2] - attempt = row[3] - read1 = row[4] - read2 = row[5] - flowcell = row[6] - lane = row[7] - index_sequence = row[8] - - //NOTE: Platform is hard-coded to illumina - bam_rg_string ="@RG\\tID:${flowcell}.${lane}\\tSM:${study}.${sample}\\tPL:illumina\\tLB:lib${library}\\tPU:${flowcell}.${lane}.${index_sequence}" - - unique_sample_id = "${study}.${sample}.L${library}.A${attempt}.${flowcell}.${lane}.${index_sequence}" - - //Accomodate single/multi reads - if (read1 && read2) { - - return tuple(unique_sample_id, bam_rg_string, tuple(file(read1), file(read2))) - - } else if (read1) { - - return tuple(unique_sample_id, bam_rg_string, tuple(file(read1))) - - } else { - - return tuple(unique_sample_id, bam_rg_string, tuple(file(read2))) - - } - } - } - //================================================================================ // Main workflow @@ -66,11 +22,11 @@ workflow { if (params.only_validate_fastqs) { - VALIDATE_FASTQS_WF(reads_ch) + VALIDATE_FASTQS_WF(params.input_samplesheet) } else { - validated_reads_ch = VALIDATE_FASTQS_WF(reads_ch) + validated_reads_ch = VALIDATE_FASTQS_WF(params.input_samplesheet) QUALITY_CHECK_WF(validated_reads_ch) @@ -79,7 +35,6 @@ workflow { MULTIPLE_INFECTIONS_WF(MAP_WF.out.rejected_sorted_reads_ch) - CALL_WF(MAP_WF.out.approved_sorted_reads_ch) collated_gvcfs_ch = CALL_WF.out.gvcf_ch diff --git a/modules/utils/samplesheet_validation.nf b/modules/utils/samplesheet_validation.nf new file mode 100644 index 00000000..c71c47ee --- /dev/null +++ b/modules/utils/samplesheet_validation.nf @@ -0,0 +1,14 @@ +process SAMPLESHEET_VALIDATION { + + input: + path(samplesheet) + + output: + path(samplesheet) + + script: + + """ + samplesheet_validation.py ${samplesheet} + """ +} diff --git a/workflows/validate_fastqs_wf.nf b/workflows/validate_fastqs_wf.nf index 1bc8b86f..c9e0676f 100644 --- a/workflows/validate_fastqs_wf.nf +++ b/workflows/validate_fastqs_wf.nf @@ -1,13 +1,53 @@ include { FASTQ_VALIDATOR } from '../modules/fastq_utils/validator.nf' addParams ( params.FASTQ_VALIDATOR ) include { UTILS_FASTQ_COHORT_VALIDATION } from '../modules/utils/fastq_cohort_validation.nf' addParams ( params.UTILS_FASTQ_COHORT_VALIDATION ) +include { SAMPLESHEET_VALIDATION } from '../modules/utils/samplesheet_validation.nf' workflow VALIDATE_FASTQS_WF { take: - reads_ch + samplesheet main: + SAMPLESHEET_VALIDATION(samplesheet) + + //NOTE: Expected structure of input CSV samplesheet + // 0 1 2 3 4 5 6 7 8 + // Study,Sample,Library,Attempt,R1,R2,Flowcell,Lane,Index Sequence + + reads_ch = SAMPLESHEET_VALIDATION.out + .splitCsv(header: false, skip: 1) + .map { row -> { + study = row[0] + sample = row[1] + library = row[2] + attempt = row[3] + read1 = row[4] + read2 = row[5] + flowcell = row[6] + lane = row[7] + index_sequence = row[8] + + //NOTE: Platform is hard-coded to illumina + bam_rg_string ="@RG\\tID:${flowcell}.${lane}\\tSM:${study}.${sample}\\tPL:illumina\\tLB:lib${library}\\tPU:${flowcell}.${lane}.${index_sequence}" + + unique_sample_id = "${study}.${sample}.L${library}.A${attempt}.${flowcell}.${lane}.${index_sequence}" + + //Accomodate single/multi reads + if (read1 && read2) { + + return tuple(unique_sample_id, bam_rg_string, tuple(file(read1), file(read2))) + + } else if (read1) { + + return tuple(unique_sample_id, bam_rg_string, tuple(file(read1))) + + } else { + + return tuple(unique_sample_id, bam_rg_string, tuple(file(read2))) + + } + } + } - //FIXME: Add the samplesheet validator process for samplesheet_validation.py script FASTQ_VALIDATOR(reads_ch)