-
Notifications
You must be signed in to change notification settings - Fork 5
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Filtering short contig lenghts before annotation #128
base: dev
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,35 @@ include { GENEPAL } from './workflows/genepal' | |
include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_genepal_pipeline' | ||
include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_genepal_pipeline' | ||
|
||
/* | ||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
PROCESS: Filter Genome Assembly by Minimum Contig Length | ||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
*/ | ||
|
||
process SEQKIT_GET_LENGTH { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we use existing nf-core modules instead of a custom local module? Nonetheless, custom local modules should be placed in the |
||
tag "${meta.id}" | ||
label 'process_medium' | ||
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container | ||
? 'https://depot.galaxyproject.org/singularity/seqkit:2.4.0--h9ee0642_0' | ||
: 'quay.io/biocontainers/seqkit:2.4.0--h9ee0642_0'}" | ||
|
||
input: | ||
tuple val(meta), path(genome_fasta) | ||
|
||
output: | ||
tuple val(meta), path("filtered_${meta.id}.fasta"), path("${meta.id}_contig_list.txt"), emit: filtered_fasta | ||
|
||
script: | ||
""" | ||
# Filter contigs based on length and output filtered FASTA | ||
seqkit seq --min-len ${params.min_contig_length} ${genome_fasta} > filtered_${meta.id}.fasta | ||
|
||
# Generate a list of filtered contigs | ||
seqkit fx2tab --length --name filtered_${meta.id}.fasta | awk '{print \$1}' > ${meta.id}_contig_list.txt | ||
""" | ||
} | ||
|
||
/* | ||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
NAMED WORKFLOWS FOR PIPELINE | ||
|
@@ -48,10 +77,15 @@ workflow PLANTFOODRESEARCHOPEN_GENEPAL { | |
|
||
main: | ||
// | ||
// WORKFLOW: Run pipeline | ||
// Filter genome assembly by minimum contig length | ||
// | ||
SEQKIT_GET_LENGTH(ch_target_assembly) | ||
|
||
// | ||
// Run GENEPAL main workflow using filtered FASTA | ||
// | ||
GENEPAL( | ||
ch_target_assembly, | ||
SEQKIT_GET_LENGTH.out.filtered_fasta.map { meta, fasta, contig_list -> [ meta, fasta ] }, // Filtered genome FASTA | ||
ch_tar_assm_str, | ||
ch_is_masked, | ||
ch_te_library, | ||
|
@@ -68,9 +102,11 @@ workflow PLANTFOODRESEARCHOPEN_GENEPAL { | |
ch_tsebra_config, | ||
ch_orthofinder_pep | ||
) | ||
|
||
emit: | ||
multiqc_report = GENEPAL.out.multiqc_report // channel: /path/to/multiqc_report.html | ||
} | ||
|
||
/* | ||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
RUN MAIN WORKFLOW | ||
|
@@ -81,9 +117,9 @@ workflow { | |
|
||
main: | ||
// | ||
// SUBWORKFLOW: Run initialisation tasks | ||
// SUBWORKFLOW: Run initialization tasks | ||
// | ||
PIPELINE_INITIALISATION ( | ||
PIPELINE_INITIALISATION( | ||
params.version, | ||
params.monochrome_logs, | ||
args, | ||
|
@@ -95,10 +131,15 @@ workflow { | |
) | ||
|
||
// | ||
// WORKFLOW: Run main workflow | ||
// Filter genome assembly by minimum contig length | ||
// | ||
SEQKIT_GET_LENGTH(PIPELINE_INITIALISATION.out.target_assembly) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
// | ||
// Run main workflow using filtered FASTA | ||
// | ||
PLANTFOODRESEARCHOPEN_GENEPAL( | ||
PIPELINE_INITIALISATION.out.target_assembly, | ||
SEQKIT_GET_LENGTH.out.filtered_fasta, | ||
PIPELINE_INITIALISATION.out.tar_assm_str, | ||
PIPELINE_INITIALISATION.out.is_masked, | ||
PIPELINE_INITIALISATION.out.te_library, | ||
|
@@ -115,10 +156,11 @@ workflow { | |
PIPELINE_INITIALISATION.out.tsebra_config, | ||
PIPELINE_INITIALISATION.out.orthofinder_pep | ||
) | ||
|
||
// | ||
// SUBWORKFLOW: Run completion tasks | ||
// | ||
PIPELINE_COMPLETION ( | ||
PIPELINE_COMPLETION( | ||
params.email, | ||
params.email_on_fail, | ||
params.plaintext_email, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ params { | |
orthofinder_annotations = null | ||
outdir = null | ||
email = null | ||
min_contig_length = 5000 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we move this to the |
||
|
||
// Repeat annotation options | ||
repeat_annotator = 'repeatmodeler' | ||
|
@@ -79,7 +80,15 @@ params { | |
custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" | ||
|
||
} | ||
|
||
// Validation for the min_contig_length parameter | ||
process { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is quite clever. However, we are using nf-schema for parameter validation which means that the parameter type and constraints are defined in a schema file and the plugin automatically validates all the parameters. The schema file is here: https://github.com/Plant-Food-Research-Open/genepal/blob/dev/nextflow_schema.json This schema can be automatically generated and refined through a web-based GUI. Please see the nf-core docs: https://nf-co.re/docs/nf-core-tools/pipelines/schema |
||
beforeScript = """ | ||
if [[ ${params.min_contig_length} -le 1000 ]]; then | ||
echo "ERROR: The parameter 'min_contig_length' must be greater than 5 kbp (5000 base pairs). Provided value: ${params.min_contig_length}" >&2 | ||
exit 1 | ||
fi | ||
""" | ||
} | ||
// Max resources | ||
process { | ||
resourceLimits = [ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Parameter documentation is auto generated with the following command,
nf-core -v pipelines schema docs > docs/parameters.md
The parameters are documented in the
docs/parameters.md
file.