Skip to content

Commit

Permalink
feat: include previous clustering
Browse files Browse the repository at this point in the history
  • Loading branch information
boasvdp committed Jun 28, 2024
1 parent 00f759e commit f2767d0
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 8 deletions.
4 changes: 4 additions & 0 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ for param in ["threads", "mem_gb"]:

OUT = config["output_dir"]

# find collection using collfinder
# iget collection and save as "previous_clustering" in working dir
PREVIOUS_CLUSTERING = "previous_clustering"

# Configure pipeline outputs
expected_outputs = []

Expand Down
2 changes: 1 addition & 1 deletion workflow/rules/clustering.smk
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ rule clustering:
message:
"Clustering {input.distances} with threshold {params.threshold}"
resources:
mem_gb=config["mem_gb"]["compression"],
mem_gb=config["mem_gb"]["clustering"],
conda:
"../envs/clustering.yaml"
container:
Expand Down
12 changes: 7 additions & 5 deletions workflow/rules/combine_snp_profiles.smk
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

rule decompress_snp_profiles:
input:
INPUT + "/aln.fa.gz",
PREVIOUS_CLUSTERING + "/aln.fa.gz",
output:
temp(OUT + "/old_aln.fa"),
log:
Expand All @@ -15,7 +15,7 @@ rule decompress_snp_profiles:
conda:
"../envs/scripts.yaml"
container:
""
"docker://ghcr.io/boasvdp/juno_clustering_scripts:0.1"
params:
script="workflow/scripts/script.py",
threads: config["threads"]["compression"]
Expand All @@ -40,13 +40,15 @@ rule add_snp_profiles:
message:
"Adding SNP profiles to {input.previous_aln}."
resources:
mem_gb=config["mem_gb"]["add_snp_profiles"],
mem_gb=config["mem_gb"]["compression"],
conda:
"../envs/scripts.yaml"
container:
""
"docker://ghcr.io/boasvdp/juno_clustering_scripts:0.1"
threads: config["threads"]["compression"]
shell:
"""
# TODO: find better way of combining samples, e.g. make sure no duplicate names
cat {input.previous_aln} {input.assemblies} > {output}
"""

Expand All @@ -65,7 +67,7 @@ rule compress_snp_profiles:
conda:
"../envs/scripts.yaml"
container:
""
"docker://ghcr.io/boasvdp/juno_clustering_scripts:0.1"
threads: config["threads"]["compression"]
shell:
"""
Expand Down
11 changes: 9 additions & 2 deletions workflow/rules/distance_calculation.smk
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,20 @@ rule distance_calculation_snp:
output:
OUT + "/distances.tsv",
conda:
""
"../envs/distance_calculation.yaml"
container:
"docker://ghcr.io/boasvdp/distle:0.1.0"
params:
max_distance=config["max_distance"],
output_mode="full",
resources:
mem_gb=config["mem_gb"]["distance_calculation"],
log:
OUT + "/log/distance_calculation_snp.log",
threads: config["threads"]["distance_calculation"]
shell:
"""
# TODO: check if the distances have already been calculated and only calculate the missing ones, this could be added to distle
distle \
--verbose \
--input-format fasta \
Expand All @@ -31,14 +35,17 @@ rule distance_calculation_cgmlst:
output:
OUT + "/distances.tsv",
conda:
""
"../envs/distance_calculation.yaml"
container:
"docker://ghcr.io/boasvdp/distle:0.1.0"
params:
max_distance=config["max_distance"],
output_mode="full",
resources:
mem_gb=config["mem_gb"]["distance_calculation"],
log:
OUT + "/log/distance_calculation_cgmlst.log",
threads: config["threads"]["distance_calculation"]
shell:
"""
distle \
Expand Down

0 comments on commit f2767d0

Please sign in to comment.