Skip to content

Commit

Permalink
Merge pull request #9 from fcaretti/Add-rules
Browse files Browse the repository at this point in the history
feat(rule): GATK's HaplotypeCaller
  • Loading branch information
fcaretti authored Jun 21, 2024
2 parents 50cf5b7 + 61e1aa5 commit c49124c
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 8 deletions.
19 changes: 13 additions & 6 deletions config/config.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,20 @@
data:
folder: "/home/federico/Desktop/RNA_SNPs_calling/data"
folder: "data_folder"

reference:
folder: "/home/federico/Desktop/RNA_SNPs_calling/data/reference"
genome: "GRCh38.primary_assembly.genome.fa"
folder: "reference_folder"
genome: "genome.fa"

known_sites:
folder: "/home/federico/Desktop/RNA_SNPs_calling/data/reference"
filename: "resources_broad_hg38_v0_Homo_sapiens_assembly38.dbsnp138.vcf"
folder: "known_sites_folder"
filename: "ex: resources_broad_hg38_v0_Homo_sapiens_assembly38.dbsnp138.vcf"

filtering:
params: "-i 'QUAL > 30 && INFO/DP > 10'"
params: "ex: -i 'QUAL > 30 && INFO/DP > 10'"

vep:
cache_dir: "cache_folder"
plugins_dir: "plugins_folder"
species: ""
build: ""
release: ""
1 change: 1 addition & 0 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ second_summaries = [
calls = [f"results/calls/{sample}.vcf" for sample in samples]
alns = [f"results/recal/{sample}.bam" for sample in samples]
idxs = [f"results/recal/{sample}.bai" for sample in samples]
vcfs = [f"results/calls_gatk/{sample}.vcf" for sample in samples]
vcf_zips = [f"results/calls_gatk/{sample}.vcf.gz" for sample in samples]
vcf_idxs = [f"results/calls_gatk/{sample}.vcf.csi" for sample in samples]
haplo_calls = "results/calls/calls_gatk.vcf"
Expand Down
3 changes: 1 addition & 2 deletions workflow/rules/gatk_haplocaller.smk
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,13 @@ rule haplotype_caller:
"v3.12.1/bio/gatk/haplotypecaller"



rule bgzip:
input:
"results/calls_gatk/{sample}.vcf",
output:
temp("results/calls_gatk/{sample}.vcf.gz"),
params:
extra="", # optional
extra="", # optional
threads: 1
log:
"logs/bgzip/{sample}.log",
Expand Down
43 changes: 43 additions & 0 deletions workflow/rules/vep.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
rule annotate_variants:
input:
calls="results/calls/calls_gatk.vcf", # .vcf, .vcf.gz or .bcf
cache=config["vep"]["cache_dir"], # can be omitted if fasta and gff are specified
plugins=config["vep"]["plugins_dir"],
fasta=reference,
fai=reference_idx,
output:
calls="results/calls/annotated_calls.vcf", # .vcf, .vcf.gz or .bcf
stats="results/calls/variants.html",
params:
# Pass a list of plugins to use, see https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html
# Plugin args can be added as well, e.g. via an entry "MyPlugin,1,FOO", see docs.
plugins=["LoFtool"],
extra="--everything", # optional: extra arguments
log:
"logs/vep/annotate.log",
threads: 4
wrapper:
"v3.12.1/bio/vep/annotate"


rule get_vep_cache:
output:
directory(config["vep"]["cache_dir"]),
params:
species=config["vep"]["species"],
build=config["vep"]["build"],
release=config["vep"]["release"],
log:
"logs/vep/cache.log",
cache: "omit-software" # save space and time with between workflow caching (see docs)
wrapper:
"v3.12.1/bio/vep/cache"


rule download_vep_plugins:
output:
temp(directory(config["vep"]["plugins_dir"])),
params:
release=config["vep"]["release"],
wrapper:
"v3.12.1/bio/vep/plugins"

0 comments on commit c49124c

Please sign in to comment.