Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle TIDDIT style SV type names #128

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ About changelog [here](https://keepachangelog.com/en/1.0.0/)
### Fixed
- Restore command accepts custom database name
- Restore command uses either database URI or host:port params
- Tiddit style variants (IDUP, TDUP or DUP:TANDEM also in SVTYPE, not only ALT field) collapse to primary type (DUP, DEL)

## [2.5.2]

Expand Down
5 changes: 5 additions & 0 deletions loqusdb/build_models/variant.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,11 @@ def get_coords(variant):
coordinates["end"] = end

sv_type = variant.INFO.get("SVTYPE")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add type hint

if sv_type:
sv_type = sv_type.split(":")[0]
if sv_type in ["TDUP", "IDUP"]:
sv_type = "DUP"

length = variant.INFO.get("SVLEN")
sv_len = abs(length) if length else end - pos
# Translocations will sometimes have a end chrom that differs from chrom
Expand Down
26 changes: 26 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,32 @@ def duptandem_variant(request):
info_dict={"END": 3092849, "SVLEN": 223, "SVTYPE": "DUP"},
)

@pytest.fixture(scope="function")
def duptandem_tiddit_variant(request):
return CyvcfVariant(
chrom="1",
ref="A",
alt="<DUP:TANDEM>",
pos=3092626,
end=3092849,
var_type="sv",
info_dict={"END": 3092849, "SVLEN": 223, "SVTYPE": "DUP:TANDEM"},
)


@pytest.fixture(scope="function")
def duptandem_old_tiddit_variant(request):
return CyvcfVariant(
chrom="1",
ref="A",
alt="<TDUP>",
pos=3092626,
end=3092849,
var_type="sv",
info_dict={"END": 3092849, "SVLEN": 223, "SVTYPE": "TDUP"},
)



@pytest.fixture(scope="function")
def translocation_variant(request):
Expand Down
38 changes: 38 additions & 0 deletions tests/vcf_tools/test_format_sv_variant.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,44 @@ def test_format_dup_tandem(duptandem_variant, case_obj):
assert formated_variant["alt"] == variant.ALT[0]
assert formated_variant["sv_type"] == "DUP"

def test_format_tiddit_dup_tandem(duptandem_tiddit_variant, case_obj):
## GIVEN a TIDDIT style notation, ie both SVTYPE info and alt field tag has subtype, eg DUP:TANDEM
variant = duptandem_tiddit_variant
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
variant = duptandem_tiddit_variant
variant: str = duptandem_tiddit_variant

case_id = case_obj["case_id"]
## WHEN parsing the variant
formated_variant = build_variant(variant=variant, case_obj=case_obj, case_id=case_id)

## THEN assert the sv is parsed correct
assert formated_variant["chrom"] == variant.CHROM
assert formated_variant["end_chrom"] == variant.CHROM
assert formated_variant["pos"] == variant.POS
assert formated_variant["end"] == variant.INFO["END"]
assert formated_variant["sv_len"] == abs(variant.INFO["SVLEN"])

assert formated_variant["ref"] == variant.REF
assert formated_variant["alt"] == variant.ALT[0]
assert formated_variant["sv_type"] == "DUP"


def test_format_old_tiddit_dup_tandem(duptandem_old_tiddit_variant, case_obj):
## GIVEN a dup with older TIDDIT notation (TDUP, IDUP, ...)
variant = duptandem_old_tiddit_variant
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
variant = duptandem_old_tiddit_variant
variant: str = duptandem_tiddit_variant_legacy_format

case_id = case_obj["case_id"]
## WHEN parsing the variant
formated_variant = build_variant(variant=variant, case_obj=case_obj, case_id=case_id)

## THEN assert the sv is parsed correct
assert formated_variant["chrom"] == variant.CHROM
assert formated_variant["end_chrom"] == variant.CHROM
assert formated_variant["pos"] == variant.POS
assert formated_variant["end"] == variant.INFO["END"]
assert formated_variant["sv_len"] == abs(variant.INFO["SVLEN"])

assert formated_variant["ref"] == variant.REF
assert formated_variant["alt"] == variant.ALT[0]
assert formated_variant["sv_type"] == "DUP"



def test_format_translocation(translocation_variant, case_obj):
## GIVEN a small insertion (This means that the insertion is included in ALT field)
Expand Down
Loading