-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from RIVM-bioinformatics/create_rule
Add PopPUNK
- Loading branch information
Showing
21 changed files
with
191 additions
and
67 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -133,3 +133,5 @@ dmypy.json | |
envs/src | ||
input | ||
output | ||
config/sample_sheet.yaml | ||
config/user_parameters.yaml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
[submodule "juno-library"] | ||
[submodule "base_juno_pipeline"] | ||
path = base_juno_pipeline | ||
url = https://github.com/RIVM-bioinformatics/base_juno_pipeline.git | ||
url = https://github.com/RIVM-bioinformatics/juno-library.git | ||
branch = v0.9.2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,21 +1,22 @@ | ||
import yaml | ||
|
||
|
||
sample_sheet=config["sample_sheet"] | ||
sample_sheet = config["sample_sheet"] | ||
with open(sample_sheet) as f: | ||
SAMPLES = yaml.safe_load(f) | ||
|
||
print(SAMPLES) | ||
|
||
OUT = config["out"] | ||
|
||
|
||
localrules: | ||
all, | ||
|
||
|
||
include: "workflow/rules/rule.smk" | ||
include: "workflow/rules/aggregatePoppunkCsv.smk" | ||
include: "workflow/rules/createQfileFasta.smk" | ||
include: "workflow/rules/PopPUNK.smk" | ||
|
||
|
||
rule all: | ||
input: | ||
expand(OUT + "/{sample}_combined.fastq", sample=SAMPLES), | ||
expand(OUT + "/poppunk_clusters.csv"), |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,9 @@ | ||
threads: | ||
template_rule: 1 | ||
create_Qfile: 1 | ||
fasta_popPUNK: 8 | ||
aggregatePoppunkCsv: 1 | ||
|
||
mem_gb: | ||
template_rule: 1 | ||
create_Qfile: 1 | ||
fasta_popPUNK: 1 | ||
aggregatePoppunkCsv: 1 |
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
import pathlib | ||
|
||
species_database_locations = { | ||
"streptococcus_pneumoniae": pathlib.Path( | ||
"/mnt/db/juno/poppunk/streptococcus/GPS_v4_references" | ||
), | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,4 +2,4 @@ name: mamba | |
channels: | ||
- conda-forge | ||
dependencies: | ||
- mamba | ||
- mamba==0.27 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
name: juno_population | ||
channels: | ||
- bioconda | ||
- conda-forge | ||
- anaconda | ||
- defaults | ||
dependencies: | ||
- git | ||
- mamba==0.27 | ||
- pandas | ||
- snakemake |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
name: poppunk | ||
channels: | ||
- bioconda | ||
- conda-forge | ||
- anaconda | ||
- defaults | ||
dependencies: | ||
- popPUNK | ||
# Joblib 1.2.0 breaks HDBscan clustering that is used by popPUNK. | ||
# Temporarily pin to v1.1, beware of vulnerability that triggered release of joblib v1.2 https://nvd.nist.gov/vuln/detail/CVE-2022-21797 | ||
- joblib==1.1.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
rule assign_popPUNK_cluster: | ||
input: | ||
OUT + "/q_files/{sample}_qfile.txt", | ||
output: | ||
output_dir=directory(OUT + "/results_per_sample/{sample}_poppunk/"), | ||
output_csv=OUT | ||
+ "/results_per_sample/{sample}_poppunk/{sample}_poppunk_clusters.csv", | ||
output_pkl=OUT | ||
+ "/results_per_sample/{sample}_poppunk/{sample}_poppunk.dists.pkl", | ||
output_npy=OUT | ||
+ "/results_per_sample/{sample}_poppunk/{sample}_poppunk.dists.npy", | ||
output_h5=OUT + "/results_per_sample/{sample}_poppunk/{sample}_poppunk.h5", | ||
log: | ||
OUT + "/log/{sample}_poppunk.log", | ||
conda: | ||
"../envs/poppunk.yaml" | ||
message: | ||
"Running popPUNK clustering" | ||
params: | ||
db_dir=config["db_dir"], | ||
resources: | ||
mem_gb=config["mem_gb"]["fasta_popPUNK"], | ||
threads: config["threads"]["fasta_popPUNK"] | ||
shell: | ||
""" | ||
poppunk_assign \ | ||
--db {params.db_dir} \ | ||
--threads {threads} --query {input} --output {output.output_dir} 2> {log} | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
rule aggregate_poppunk_csv: | ||
input: | ||
expand( | ||
OUT + "/results_per_sample/{sample}_poppunk/{sample}_poppunk_clusters.csv", | ||
sample=SAMPLES, | ||
), | ||
output: | ||
OUT + "/poppunk_clusters.csv", | ||
log: | ||
OUT + "/log/summarize.log", | ||
message: | ||
"Merging individual popPUNK output to one csv." | ||
resources: | ||
mem_gb=config["mem_gb"]["aggregatePoppunkCsv"], | ||
threads: config["threads"]["aggregatePoppunkCsv"] | ||
run: | ||
import pandas as pd | ||
|
||
aggregated_csv = pd.concat([pd.read_csv(f) for f in input], ignore_index=True) | ||
aggregated_csv.to_csv(output[0]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
rule create_Qfile_fasta: | ||
"""Create popPUNKs required query file, a textfile containing sampleID and location of fasta""" | ||
# TODO: popPUNK can also work from fastqs. This approach completely ignores this. | ||
input: | ||
lambda wc: SAMPLES[wc.sample]["assembly"], | ||
output: | ||
OUT + "/q_files/{sample}_qfile.txt", | ||
resources: | ||
mem_gb=config["mem_gb"]["create_Qfile"], | ||
threads: config["threads"]["create_Qfile"] | ||
shell: | ||
""" | ||
printf "$(basename {input} .fasta)\t$(realpath {input})\n" > {output} | ||
""" |
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.