Skip to content

Commit

Permalink
Merge pull request #13 from RIVM-bioinformatics/fix_localrule
Browse files Browse the repository at this point in the history
Fix localrule
  • Loading branch information
boasvdp authored Jun 3, 2024
2 parents 79b041e + 7872ba3 commit def3c64
Show file tree
Hide file tree
Showing 6 changed files with 18 additions and 20 deletions.
2 changes: 1 addition & 1 deletion Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ localrules:
copy_ref_gff,
aggregate_species,
no_typing,
cauris_extract_amr_mutations,
cauris_extract_aa_mutations,
combine_auriclas,


Expand Down
19 changes: 7 additions & 12 deletions tests/test_amr_mutation_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,14 +160,14 @@ def test_combine_exact_matches_and_possible_cnvs(self):

class TestAaComparison(unittest.TestCase):
df_resistance_genes_correct = pd.read_csv(
"tests/test_files/df_resistance_genes_correct.tsv", sep="\t"
"tests/test_files/df_resistance_genes_correct.tsv", sep="\t", dtype={"AF": str}
)

def test_read_input_file(self):
df_mutations_test_read_input_correct = pd.read_csv(
"tests/test_files/df_mutations_test_read_input_correct.tsv",
sep="\t",
dtype={"AF": float},
dtype={"AF": str},
)
df_mutations_test_read_input = read_input_file(
Path("tests/test_files/df_mutations_test_read_input.tsv")
Expand All @@ -180,12 +180,6 @@ def test_read_input_file(self):
df_mutations_test_read_input_correct[
"type"
] = df_mutations_test_read_input_correct["type"].fillna("NA")
df_mutations_test_read_input.to_csv(
"tests/test_files/inspect1.tsv", sep="\t", index=False
)
df_mutations_test_read_input_correct.to_csv(
"tests/test_files/inspect2.tsv", sep="\t", index=False
)
self.assertTrue(
df_mutations_test_read_input.equals(df_mutations_test_read_input_correct)
)
Expand All @@ -201,13 +195,11 @@ def test_create_locus_tag_gene_dict(self):
)

def test_filter_for_resistance_genes(self):
# df_resistance_genes_correct = pd.read_csv(
# "tests/test_files/df_resistance_genes_correct.tsv", sep="\t"
# )
df_mutations_parsed = read_input_file(
Path("tests/test_files/df_mutations_test_read_input.tsv")
)
df_resistance_genes_correct_copy = self.df_resistance_genes_correct.copy()

df_resistance_genes = filter_for_resistance_genes(
df_mutations=df_mutations_parsed,
dict_locus_tag_gene={"b0001": "gene A"},
Expand All @@ -225,12 +217,15 @@ def test_merge_resistance_genes_with_ref(self):
resistance_variants_csv=df_aa_resistance_variants,
)
df_resistance_with_impact_correct = pd.read_csv(
"tests/test_files/df_resistance_with_impact_correct.tsv", sep="\t"
"tests/test_files/df_resistance_with_impact_correct.tsv",
sep="\t",
dtype={"AF": str},
)
self.assertEqual(df_resistance_with_impact.shape[0], 2)
self.assertEqual(df_resistance_with_impact.shape[1], 14)
df_resistance_with_impact.reset_index(drop=True, inplace=True)
df_resistance_with_impact_correct.reset_index(drop=True, inplace=True)

self.assertTrue(
df_resistance_with_impact.equals(df_resistance_with_impact_correct)
)
Expand Down
4 changes: 2 additions & 2 deletions tests/test_files/df_resistance_genes_correct.tsv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
CHROM POS TYPE REF ALT DP AF type locus_tag mutation_name ref_aa alt_aa genetic_element
NC_000913.3 100 SNP A T 100 1.0 missense b0001 10E>10K 10E 10K gene A
NC_000913.3 200 SNP A T 100 1.0 synonymous b0001 20S 20S gene A
NC_000913.3 100 SNP A T 100 1 missense b0001 10E>10K 10E 10K gene A
NC_000913.3 200 SNP A T 100 1 synonymous b0001 20S 20S gene A
4 changes: 2 additions & 2 deletions tests/test_files/df_resistance_with_impact_correct.tsv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
CHROM POS TYPE REF ALT DP AF type locus_tag mutation_name ref_aa alt_aa genetic_element impact
NC_000913.3 100 SNP A T 100 1.0 missense b0001 10E>10K 10E 10K gene A resistance
NC_000913.3 200 SNP A T 100 1.0 synonymous b0001 20S 20S gene A
NC_000913.3 100 SNP A T 100 1 missense b0001 10E>10K 10E 10K gene A resistance
NC_000913.3 200 SNP A T 100 1 synonymous b0001 20S 20S gene A
4 changes: 2 additions & 2 deletions workflow/rules/cauris_typing.smk
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,10 @@ rule cauris_extract_aa_mutations:
message:
"Extract AMR mutations for {wildcards.sample}"
log:
OUT + "/log/cauris_extract_amr_mutations/{sample}.log",
OUT + "/log/cauris_compare_aa_mutations/{sample}.log",
shell:
"""
python workflow/scripts/extract_amr_mutations.py \
python workflow/scripts/compare_aa_mutations.py \
--input {input.tsv} \
--output {output.tsv} \
--full-output {output.full} \
Expand Down
5 changes: 4 additions & 1 deletion workflow/scripts/compare_aa_mutations.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,11 @@ def read_input_file(input_file: Path) -> pd.DataFrame:
# Read lines into pandas dataframe
df_input = pd.DataFrame([line.split("\t") for line in lines[1:]])
df_input.columns = lines[0].rstrip("\n").split("\t")
# if AF contains a string like 0.5,0.5 convert to two rows for this record with AF 0.5
# df_input = df_input.assign(AF=df_input["AF"].str.split(",")).explode("AF")
# Set dtypes
df_input = df_input.astype({"POS": int, "DP": int, "AF": float})
# df_input = df_input.astype({"POS": int, "DP": int, "AF": float})
df_input = df_input.astype({"POS": int, "DP": int, "AF": str})
df_input[["type", "locus_tag", "mutation_name"]] = df_input["BCSQ"].str.split(
"|", expand=True
)[[0, 1, 5]]
Expand Down

0 comments on commit def3c64

Please sign in to comment.