diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d70e04..2ddd755 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ About changelog [here](https://keepachangelog.com/en/1.0.0/) ### Fixed - Restore command accepts custom database name - Restore command uses either database URI or host:port params +- Tiddit style variants (IDUP, TDUP or DUP:TANDEM also in SVTYPE, not only ALT field) collapse to primary type (DUP, DEL) ## [2.5.2] diff --git a/loqusdb/build_models/variant.py b/loqusdb/build_models/variant.py index ed0ac46..969ebba 100644 --- a/loqusdb/build_models/variant.py +++ b/loqusdb/build_models/variant.py @@ -98,6 +98,11 @@ def get_coords(variant): coordinates["end"] = end sv_type = variant.INFO.get("SVTYPE") + if sv_type: + sv_type = sv_type.split(":")[0] + if sv_type in ["TDUP", "IDUP"]: + sv_type = "DUP" + length = variant.INFO.get("SVLEN") sv_len = abs(length) if length else end - pos # Translocations will sometimes have a end chrom that differs from chrom diff --git a/tests/conftest.py b/tests/conftest.py index dc079b9..6cc5e59 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -424,6 +424,32 @@ def duptandem_variant(request): info_dict={"END": 3092849, "SVLEN": 223, "SVTYPE": "DUP"}, ) +@pytest.fixture(scope="function") +def duptandem_tiddit_variant(request): + return CyvcfVariant( + chrom="1", + ref="A", + alt="", + pos=3092626, + end=3092849, + var_type="sv", + info_dict={"END": 3092849, "SVLEN": 223, "SVTYPE": "DUP:TANDEM"}, + ) + + +@pytest.fixture(scope="function") +def duptandem_old_tiddit_variant(request): + return CyvcfVariant( + chrom="1", + ref="A", + alt="", + pos=3092626, + end=3092849, + var_type="sv", + info_dict={"END": 3092849, "SVLEN": 223, "SVTYPE": "TDUP"}, + ) + + @pytest.fixture(scope="function") def translocation_variant(request): diff --git a/tests/vcf_tools/test_format_sv_variant.py b/tests/vcf_tools/test_format_sv_variant.py index e4e3036..3a465d3 100644 --- a/tests/vcf_tools/test_format_sv_variant.py +++ b/tests/vcf_tools/test_format_sv_variant.py @@ -82,6 +82,44 @@ def test_format_dup_tandem(duptandem_variant, case_obj): assert formated_variant["alt"] == variant.ALT[0] assert formated_variant["sv_type"] == "DUP" +def test_format_tiddit_dup_tandem(duptandem_tiddit_variant, case_obj): + ## GIVEN a TIDDIT style notation, ie both SVTYPE info and alt field tag has subtype, eg DUP:TANDEM + variant = duptandem_tiddit_variant + case_id = case_obj["case_id"] + ## WHEN parsing the variant + formated_variant = build_variant(variant=variant, case_obj=case_obj, case_id=case_id) + + ## THEN assert the sv is parsed correct + assert formated_variant["chrom"] == variant.CHROM + assert formated_variant["end_chrom"] == variant.CHROM + assert formated_variant["pos"] == variant.POS + assert formated_variant["end"] == variant.INFO["END"] + assert formated_variant["sv_len"] == abs(variant.INFO["SVLEN"]) + + assert formated_variant["ref"] == variant.REF + assert formated_variant["alt"] == variant.ALT[0] + assert formated_variant["sv_type"] == "DUP" + + +def test_format_old_tiddit_dup_tandem(duptandem_old_tiddit_variant, case_obj): + ## GIVEN a dup with older TIDDIT notation (TDUP, IDUP, ...) + variant = duptandem_old_tiddit_variant + case_id = case_obj["case_id"] + ## WHEN parsing the variant + formated_variant = build_variant(variant=variant, case_obj=case_obj, case_id=case_id) + + ## THEN assert the sv is parsed correct + assert formated_variant["chrom"] == variant.CHROM + assert formated_variant["end_chrom"] == variant.CHROM + assert formated_variant["pos"] == variant.POS + assert formated_variant["end"] == variant.INFO["END"] + assert formated_variant["sv_len"] == abs(variant.INFO["SVLEN"]) + + assert formated_variant["ref"] == variant.REF + assert formated_variant["alt"] == variant.ALT[0] + assert formated_variant["sv_type"] == "DUP" + + def test_format_translocation(translocation_variant, case_obj): ## GIVEN a small insertion (This means that the insertion is included in ALT field)