nextflow.config

/*
 * Workflow metadata
 */

manifest {
   author = "Patrick Barth"
   version = "1.0.0"
   mainScript = "main.nf"
   defaultBranch = "main"
   name = "PARANOiD"
   description = "Pipeline for Automated Read ANalysis Of iCLIP Data"
   nextflowVersion = "23.10.0"
}

/*
Parameters
*/

params {
	// general params
	split_fastq_by = 1000000
	speed = false
	output = "./output/"						//PATH leading to directory in which output is supposed to be stored
	mapq = 2 										//INT minimum mapq-score needed to retain an alignment -> soon to be deprecated

	// non essential input files
	annotation = 'NO_FILE'

	// preprocessing
	min_length = 30									//INT minimum length of reads required after preprocessing
	min_qual = 20									//INT minimum quality of nucleotide required to retain it
	min_percent_qual_filter = 90					//INT minimum percentage of nucleotides in a read required to have their quality above params.min_qual to retain the read

	// barcode handling 
	barcode_pattern = "NNNNNXXXXXXNNNN" 			//STRING containing barcode pattern -> N = random barcode; X = experimental barcode
	barcode_mismatches = 1 							//INT maximum number of mismatches in the experimental barcode sequence to still allocate the read

	// alignment
	domain = "pro" 									//STRING decides if bowtie2 or STAR is used -> pro = bowtie2; eu = STAR
	max_alignments = 1
	report_all_alignments = false

	// replicate merging
	merge_replicates = false
	correlation_analysis = false
	combine_strands_correlation = false

	// Parameters for transcript analysis
	map_to_transcripts = false 						//BOOLEAN decides if top X sequences from the reference are presented in the output
	number_top_transcripts = 10 					//INT number of top transcripts presented when params.map_to_transcripts = true

	// Parameters for peak calling
	omit_peak_calling = false 						//BOOLEAN decides if peak calling via pureclip takes place after normal processing
	peak_calling_for_high_coverage = false 			//BOOLEAN adds arguments to PureCLIP which allow the tool to run with BAM files containing coverages all over the reference
	peak_calling_regions = false
	peak_calling_regions_width = 8

	// Parameters for RNA subtype distribution
	gene_id = "ID"									//STRING name of gene_id used within the annotation file
	color_barplot = "#69b3a2"						//STRING color used for barplots
	//rna_subtypes = 'lnc_RNA,miRNA,mRNA,ncRNA,rRNA,snoRNA,snRNA,tRNA'
	rna_subtypes = '3_prime_UTR,transcript,5_prime_UTR'	//STRING RNA-subtypes used for the distribution of CL-sites

	// peak distance analysis
	omit_peak_distance = false
	percentile = 90									//INT percentile that decides which cl-sites are considered when calculating distances and extracting sequences
	distance = 30 									//INT maximum distance to check for distances between cl-sites

	// Parameters for sequence extraction and motif search
	omit_sequence_extraction = false
	seq_len = 20							//INT length to both sides of cl-sites from which nucleotides are recovered
	omit_cl_nucleotide = false
	omit_cl_width = 0						// INT number of nucleotides to be replaced with ends on both sides of the cl-site
	remove_overlaps = false					// BOOL 
	max_motif_num = 50						// INT max number of motifs to search for 
	min_motif_width = 8						// INT minimum motif width to report, >=3
	max_motif_width = 15					// INT maximum motif width to report, <= 30
}

/*
 * Fixed parameters 
*/

params.manifest   = manifest
params.help       = false

profiles {
	sge {
		process.executor = 'sge'
		process.shell = ['/usr/bin/env', 'bash']
		process.queue = 'all.q'
		process.clusterOptions = '-V -S /bin/bash'
		process.penv = 'multislot'
	}
	slurm {
		process.executor = 'slurm'
	}
	local {
		process.executor = 'local'
	}

	singularity {
		singularity.enabled = true
		singularity.autoMounts = true
	}
	docker {
		docker.enabled = true
		runOptions = '-u $(id -u):$(id -g)'
	}
	podman {
		podman.enabled = true
	}
}

process {
	withName: 'quality_control|quality_control_2' {
		cpus = 1
		memory = '1 GB'
		container = 'docker://pbarth/fastqc:1.0'
	}

	withName: adapter_removal {
		cpus = 1
		memory = '5 GB'
		container = 'docker://pbarth/trim_galore:1.0'
	}

	withName: quality_filter {
		cpus = 1
		memory = '1 GB'
		container = 'docker://pbarth/fastx:1.0'
	}

	withName: remove_exp_barcode {
		cpus = 1
		memory = '100 MB'
		container = 'docker://pbarth/fastx:1.0'
	}

	withName: extract_rnd_barcode {
		cpus = 1
		memory = '1 GB'
		container = 'docker://pbarth/umi-tools:1.0'
	}

	withName: deduplicate {
		cpus = 1
		//memory = '100 GB'
		container = 'docker://pbarth/umi-tools:1.0'
	}

	withName: check_barcode_file{
		cpus = 1
		memory = '200 MB'
		container = 'docker://pbarth/checkbarcode:1.0'
	}

	withName: 'get_length_exp_barcode|merge_preprocessed_reads|get_top_hits|remove_newlines|extract_top_transcript_sequences|collect_experiments_without_alignments|collect_subtype_analysis_errors|collect_workflow_metrics' {
		cpus = 1
		memory = '500 MB'
		container = 'docker://pbarth/base:1.0'
	}

	withName: 'merge_deduplicated_bam|count_hits|filter_empty_bams|sort_bam|sort_bam_before_strand_pref|determine_strand_preference|index_alignments|extract_top_alignments|index_for_peak_calling|sort_and_index_alignment|get_chromosome_sizes' {
		cpus = 1 
		memory = '10 GB'
		container = 'docker://pbarth/samtools:1.0'
	}

	withName: 'split_bam_by_chromosome' {
		cpus = 1
		memory = '500 MB'
		container = 'docker://pbarth/bamtools:1.0'
	}

	withName: 'merge_wigs' {
		cpus = 1 
		memory = '20 GB'
		container = 'docker://pbarth/merge:1.0.1'
	}

	withName: 'wig_to_bigWig|bigWig_to_bedgraph' {
		cpus = 1
		memory = '500 MB'
		container = 'docker://pbarth/wigtobigwig:1.0'
	}

	withName: split_exp_barcode {
		cpus = 1
		memory = '500 MB'
		container = 'docker://pbarth/fastx:1.0'
	}

	withName: 'build_index_bowtie|mapping_bowtie' {
		cpus = 4
		memory = '5 GB'
		container = 'docker://pbarth/bowtie2:1.0'
	}

	withName: 'build_index_STAR|mapping_STAR' {
		cpus = 8
		memory = '100 GB'
		container = 'docker://pbarth/star:1.0'
	}

	withName: 'calculate_crosslink_sites|split_wig_2_for_peak_height_hist|split_wig2_for_correlation' {
		cpus = 1
		memory = '10 GB'
		container = 'docker://pbarth/determine_cl_sites:1.0'
	}

	withName: feature_counts {
		cpus = 1
		memory = '1 GB'
		container = 'docker://pbarth/subread:1.0'
	}

	withName: pureCLIP {
		cpus = 8
		memory = '50 GB'
		container = 'quay.io/biocontainers/pureclip:1.3.1--0'
	}

	withName: pureCLIP_to_wig {
		cpus = 1
		memory = '1 GB'
		container = 'docker://pbarth/wig-to-wig2:1.0'
	}

	withName: multiqc {
		cpus = 1
		memory = '10 GB'
		container = 'docker://pbarth/multiqc:1.0'
	}

	withName: wig_to_bam {
		cpus = 1
		memory = '10 GB'
		container = 'docker://pbarth/wig2bam:1.0'
	}

	withName: get_RNA_subtypes_distribution{
		cpus = 1
		memory = '10 GB'
		container = 'docker://pbarth/rna_subtypes_distribution:1.0.1'
	}

	withName: 'generate_RNA_subtypes_barplot|plot_peak_distance|generate_barcode_barplot|visualize_strand_preference|generate_peak_height_histogram' {
		cpus = 1
		memory = '5 GB'
		container = 'docker://pbarth/rplots:1.0.3'
	}
	
	withName: 'calc_wig_correlation' {
        cpus = 1
        memory = '10 GB'
        container = 'docker://pbarth/rplots:1.0.3'
    }

	withName: sequence_extraction{
		cpus = 1
		memory = '500 MB'
		container = 'docker://pbarth/sequence_extraction:1.0.1'
	}

	withName: prepare_annotation_for_igv {
		cpus = 1
		memory = '5 GB'
		container = 'docker://pbarth/igvprep:1.0'
	}

	withName: generate_igv_session {
		cpus = 1
		memory = '1 GB'
		container = 'docker://pbarth/igvsession:1.0'
	}

	withName: motif_search{
		cpus = 1
		memory = '5 GB'
		container = 'docker://pbarth/meme:1.0'
	}

	withName: calculate_peak_distance{
		cpus = 1
		memory = '2 GB'
		container = 'docker://pbarth/peak_distance:1.0'
	}
}