Skip to content

Commit

Permalink
Merge pull request #10 from fcaretti/Add-rules
Browse files Browse the repository at this point in the history
feat(rule): add VEP annotation (no wrapper)
  • Loading branch information
fcaretti authored Jun 21, 2024
2 parents c49124c + 2c72e6d commit 757940d
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 40 deletions.
11 changes: 10 additions & 1 deletion .test/config/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,13 @@ known_sites:
filename: "placeholder.vcf"

filtering:
params: ""
params: ""

vep:
cache_dir: ""
zip_name: "homo_sapiens_vep_112_GRCh38.tar.gz"
url: "https://ftp.ensembl.org/pub/release-112/variation/indexed_vep_cache/homo_sapiens_vep_112_GRCh38.tar.gz"
image: "docker://ensemblorg/ensembl-vep:release_112.0"
filters: "--filter "
impact_levels: [ "MODERATE", "HIGH"]
species: "homo_sapiens"
12 changes: 7 additions & 5 deletions config/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@ filtering:
params: "ex: -i 'QUAL > 30 && INFO/DP > 10'"

vep:
cache_dir: "cache_folder"
plugins_dir: "plugins_folder"
species: ""
build: ""
release: ""
cache_dir: ""
zip_name: "ex: homo_sapiens_vep_112_GRCh38.tar.gz"
url: "ex: https://ftp.ensembl.org/pub/release-112/variation/indexed_vep_cache/homo_sapiens_vep_112_GRCh38.tar.gz"
image: "ex: docker://ensemblorg/ensembl-vep:release_112.0"
filters: "--filter "
impact_levels: [ "MODERATE", "HIGH"]
species: "ex: homo_sapiens"
3 changes: 2 additions & 1 deletion workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ haplo_calls = "results/calls/calls_gatk.vcf"

rule all:
input:
"results/calls/filtered_calls.vcf",
"results/calls/annotated_calls.vcf",
first_summaries,
second_summaries,

Expand All @@ -52,3 +52,4 @@ include: "rules/recalibration.smk"
include: "rules/alignment_summary.smk"
include: "rules/gatk_haplocaller.smk"
include: "rules/filter.smk"
include: "rules/vep.smk"
5 changes: 5 additions & 0 deletions workflow/envs/curl.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
name: wget
channels:
- conda-forge
dependencies:
- curl=8.8.0
7 changes: 7 additions & 0 deletions workflow/envs/unzip.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name: unzip
channels:
- bioconda
- conda-forge
- defaults
dependencies:
- htslib=1.19.1
83 changes: 50 additions & 33 deletions workflow/rules/vep.smk
Original file line number Diff line number Diff line change
@@ -1,43 +1,60 @@
rule annotate_variants:
input:
calls="results/calls/calls_gatk.vcf", # .vcf, .vcf.gz or .bcf
cache=config["vep"]["cache_dir"], # can be omitted if fasta and gff are specified
plugins=config["vep"]["plugins_dir"],
fasta=reference,
fai=reference_idx,
rule download_vep_cache:
output:
calls="results/calls/annotated_calls.vcf", # .vcf, .vcf.gz or .bcf
stats="results/calls/variants.html",
params:
# Pass a list of plugins to use, see https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html
# Plugin args can be added as well, e.g. via an entry "MyPlugin,1,FOO", see docs.
plugins=["LoFtool"],
extra="--everything", # optional: extra arguments
expand(
"{dir}/{zip_name}",
dir=config["vep"]["cache_dir"],
zip_name=config["vep"]["zip_name"],
),
log:
"logs/vep/annotate.log",
threads: 4
wrapper:
"v3.12.1/bio/vep/annotate"
log_file="logs/vep/download_vep_cache.log",
params:
cache_url=lambda wc: config["vep"]["url"],
directory=config["vep"]["cache_dir"],
conda:
"../envs/curl.yml" # Updated to use a conda environment with curl
shell:
"""
mkdir -p {params.directory}
curl -L -o {output} {params.cache_url} >> {log.log_file} 2>&1
"""


rule get_vep_cache:
rule unzip_vep_cache:
input:
tar_file=expand(
"{dir}/{zip_name}",
dir=config["vep"]["cache_dir"],
zip_name=config["vep"]["zip_name"],
),
output:
directory(config["vep"]["cache_dir"]),
params:
species=config["vep"]["species"],
build=config["vep"]["build"],
release=config["vep"]["release"],
species_dir=directory("{cache_dir}/{species}".format(**config["vep"])),
log:
"logs/vep/cache.log",
cache: "omit-software" # save space and time with between workflow caching (see docs)
wrapper:
"v3.12.1/bio/vep/cache"
log_file="logs/vep/unzip_vep_cache.log",
conda:
"../envs/unzip.yml"
shell:
"""
tar -xzvf {input.tar_file} >> {log.log_file} 2>&1
"""


rule download_vep_plugins:
rule vep_annotation:
input:
vcf="results/calls/filtered_calls.vcf",
dir="{cache_dir}/{species}".format(**config["vep"]),
output:
temp(directory(config["vep"]["plugins_dir"])),
annotated_vcf="results/calls/annotated_calls.vcf",
params:
release=config["vep"]["release"],
wrapper:
"v3.12.1/bio/vep/plugins"
cache_dir=lambda wc: config["vep"]["cache_dir"],
species=lambda wc: config["vep"]["species"],
container:
config["vep"]["image"]
resources:
cores=4,
log:
log_file="logs/vep/vep_annotation.log",
shell:
"""
vep --input_file {input.vcf} --output_file {output.annotated_vcf} --offline --vcf --species homo_sapiens \
--cache --dir_cache {params.cache_dir} --force_overwrite --fork {resources.cores} > {log.log_file} 2>&1
"""

0 comments on commit 757940d

Please sign in to comment.