Skip to content

Commit

Permalink
Update the code to accept intronic variants in transcripts with alter…
Browse files Browse the repository at this point in the history
…nate alignments in patches vs the primary assembly. Issue #657
  • Loading branch information
Peter-J-Freeman committed Dec 5, 2024
1 parent 033d948 commit 45f5d07
Show file tree
Hide file tree
Showing 7 changed files with 211 additions and 28 deletions.
25 changes: 24 additions & 1 deletion VariantValidator/modules/format_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -637,11 +637,21 @@ def intronic_converter(variant, validator, skip_check=False):

# Add the edited variant for next stage error processing e.g. exon boundaries.
variant.quibble = transy
# Expanding list of exceptions
try:
hgvs_transy = validator.hp.parse_hgvs_variant(transy)
except vvhgvs.exceptions.HGVSError:
# Allele syntax caught here
if "[" in transy and "]" in transy:
return genomic_ref
else:
raise

if skip_check is True:
return genomic_ref
else:
# Check the specified base is correct
hgvs_genomic = validator.nr_vm.c_to_g(validator.hp.parse_hgvs_variant(transy), genomic_ref,
hgvs_genomic = validator.nr_vm.c_to_g(hgvs_transy, genomic_ref,
alt_aln_method=validator.alt_aln_method)
try:
validator.vr.validate(hgvs_genomic)
Expand All @@ -652,6 +662,19 @@ def intronic_converter(variant, validator, skip_check=False):
else:
validator.vr.validate(hgvs_genomic)

# Check re-mapping of intronic variants
if hgvs_transy.posedit.pos.start.offset != 0 or hgvs_transy.posedit.pos.end.offset != 0:
try:
check_intronic_mapping = validator.nr_vm.g_to_t(hgvs_genomic, hgvs_transy.ac,
alt_aln_method=validator.alt_aln_method)
if check_intronic_mapping.posedit.pos == hgvs_transy.posedit.pos:
pass
else:
variant.warnings.append(f'ExonBoundaryError: {hgvs_transy.posedit.pos} does not match the exon '
f'boundaries for the alignment of {hgvs_transy.ac} to {hgvs_genomic.ac}')
except vvhgvs.exceptions.HGVSError:
pass

logger.debug("HVGS typesetting complete")


Expand Down
24 changes: 15 additions & 9 deletions VariantValidator/modules/hgvs_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2277,15 +2277,21 @@ def hgvs_ref_alt(hgvs_variant, sf):
ref_alt_dict = {'ref': ref, 'alt': alt}
return ref_alt_dict

#
# def hgvs_to_delins(hgvs_variant_object):
# """
# Refer to https://github.com/openvar/vv_hgvs/blob/master/examples/creating-a-variant.ipynb
# :param hgvs_variant_object:
# :return: hgvs_variant_object in raw type = "delins" state
# """
#
#

def incomplete_alignment_mapping_t_to_g(validator, variant):
output = None
mapping_options = validator.hdp.get_tx_mapping_options(variant.input_parses.ac)
for option in mapping_options:
if option[2] == validator.alt_aln_method and "NC_" not in option[1]:
in_assembly = seq_data.to_chr_num_refseq(option[1], variant.primary_assembly)
if in_assembly is not None:
try:
output = validator.vm.t_to_g(variant.input_parses, option[1])
if variant.input_parses.posedit.edit.type == "identity":
output.posedit.edit.alt = output.posedit.edit.ref
except vvhgvs.exceptions.HGVSError:
pass
return output

# <LICENSE>
# Copyright (C) 2016-2024 VariantValidator Contributors
Expand Down
61 changes: 50 additions & 11 deletions VariantValidator/modules/mappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,13 +240,12 @@ def transcripts_to_gene(variant, validator, select_transcripts_dict_plus_version
# Se rec_var to '' so it can be updated later
rec_var = ''

# First task is to get the genomic equivalent, and pri nt useful error messages if it can't be found.
# First task is to get the genomic equivalent, and print useful error messages if it can't be found.
try:
to_g = validator.myevm_t_to_g(obj, variant.no_norm_evm, variant.primary_assembly, variant.hn)
genomic_ac = to_g.ac
except vvhgvs.exceptions.HGVSDataNotAvailableError as e:
errors = []

if ('~' in str(e) and 'Alignment is incomplete' in str(e)) or "No relevant genomic mapping options" in str(e):
# Unable to map the input variant onto a genomic position
if '~' in str(e) and 'Alignment is incomplete' in str(e):
Expand Down Expand Up @@ -312,7 +311,19 @@ def transcripts_to_gene(variant, validator, select_transcripts_dict_plus_version
else:
# Normalize was I believe to replace ref. Mapping does this anyway
# to_g = variant.hn.normalize(to_g)
formatted_variant = str(validator.myevm_g_to_t(variant.evm, to_g, tx_ac))
try:
formatted_variant = str(validator.myevm_g_to_t(variant.evm, to_g, tx_ac))
except vvhgvs.exceptions.HGVSError as e:
if "Alignment is incomplete" in str(e):
to_g = hgvs_utils.incomplete_alignment_mapping_t_to_g(validator, variant)
if to_g is None:
error = str(e)
variant.warnings.append(error)
logger.warning(error)
return True
else:
variant.hgvc_genomic = to_g
formatted_variant = str(validator.vm.g_to_t(to_g, tx_ac))

elif ':g.' in quibble_input:
if plus.search(formatted_variant) or minus.search(formatted_variant):
Expand Down Expand Up @@ -388,7 +399,6 @@ def transcripts_to_gene(variant, validator, select_transcripts_dict_plus_version
# hgvs will handle incorrect coordinates so need to automap errors
# Make sure any input intronic coordinates are correct
# Get the desired transcript

if cck:
# This should only ever hit coding variants (RNA has been converted to c by now)
if 'del' in formatted_variant:
Expand All @@ -414,9 +424,20 @@ def transcripts_to_gene(variant, validator, select_transcripts_dict_plus_version
try:
post_var = validator.myevm_g_to_t(variant.evm, pre_var, trans_acc)
except vvhgvs.exceptions.HGVSError as error:
variant.warnings.append(str(error))
logger.warning(str(error))
return True
if "Alignment is incomplete" in str(error):
output = hgvs_utils.incomplete_alignment_mapping_t_to_g(validator, variant)
if output is None:
error = str(error)
variant.warnings.append(error)
logger.warning(error)
return True
else:
post_var = validator.vm.g_to_t(output, tx_ac)
variant.hgvs_genomic = output
else:
variant.warnings.append(str(error))
logger.warning(str(error))
return True

test = validator.hp.parse_hgvs_variant(quibble_input)
if post_var.posedit.pos.start.base != test.posedit.pos.start.base or \
Expand Down Expand Up @@ -467,7 +488,20 @@ def transcripts_to_gene(variant, validator, select_transcripts_dict_plus_version
variant.hn)

# genome back to C coordinates
post_var = validator.myevm_g_to_t(variant.evm, pre_var, trans_acc)
try:
post_var = validator.myevm_g_to_t(variant.evm, pre_var, trans_acc)
except vvhgvs.exceptions.HGVSError as e:
if "Alignment is incomplete" in str(e):
pre_var = hgvs_utils.incomplete_alignment_mapping_t_to_g(validator, variant)
if to_g is None:
error = str(e)
variant.warnings.append(error)
logger.warning(error)
return True
else:
post_var = validator.vm.g_to_t(pre_var, tx_ac)
variant.hgvs_genomic = pre_var

test = validator.hp.parse_hgvs_variant(quibble_input)
if post_var.posedit.pos.start.base != test.posedit.pos.start.base or \
post_var.posedit.pos.end.base != test.posedit.pos.end.base:
Expand Down Expand Up @@ -590,7 +624,12 @@ def transcripts_to_gene(variant, validator, select_transcripts_dict_plus_version
logger.debug("gap_compensation_1 = " + str(gap_compensation))

# Genomic sequence
hgvs_genomic = validator.myevm_t_to_g(hgvs_coding, variant.no_norm_evm, variant.primary_assembly, variant.hn)
if variant.hgvs_genomic is not None:
hgvs_genomic = variant.hgvs_genomic
else:
hgvs_genomic = validator.myevm_t_to_g(hgvs_coding, variant.no_norm_evm, variant.primary_assembly, variant.hn)



# Create gap_mapper object instance
gap_mapper = gapped_mapping.GapMapper(variant, validator)
Expand Down Expand Up @@ -875,12 +914,12 @@ def final_tx_to_multiple_genomic(variant, validator, tx_variant, liftover_level=
ori = validator.tx_exons(tx_ac=variant.hgvs_coding.ac, alt_ac=alt_chr,
alt_aln_method=validator.alt_aln_method)

# Map the variant to the genome
hgvs_alt_genomic = validator.myvm_t_to_g(variant.hgvs_coding, alt_chr, variant.no_norm_evm, variant.hn)

gap_mapper = gapped_mapping.GapMapper(variant, validator)

# Loop out gap code under these circumstances!
if gap_compensation:
gap_mapper = gapped_mapping.GapMapper(variant, validator)
hgvs_alt_genomic, hgvs_coding = gap_mapper.g_to_t_gap_compensation_version3(
hgvs_alt_genomic, variant.hgvs_coding, ori, alt_chr, rec_var)
variant.hgvs_coding = hgvs_coding
Expand Down
22 changes: 16 additions & 6 deletions VariantValidator/modules/use_checking.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import logging
from . import utils as fn
import copy
from . import hgvs_utils

logger = logging.getLogger(__name__)

Expand All @@ -25,6 +26,7 @@ def refseq_common_mistakes(variant):
variant.warnings.append(error)
logger.warning(error)
return True

# NR_ c.
if variant.transcript_type == "n" and variant.reftype == ':c.':
suggestion = variant.quibble.replace(':c.', ':n.')
Expand All @@ -33,6 +35,7 @@ def refseq_common_mistakes(variant):
variant.warnings.append(error)
logger.warning(error)
return True

# NM_ n.
if variant.transcript_type == "c" and variant.reftype == ':n.':
suggestion = variant.quibble.replace(':n.', ':c.')
Expand Down Expand Up @@ -74,7 +77,6 @@ def structure_checks(variant, validator):
variant.input_parses = input_parses
variant.gene_symbol = validator.db.get_gene_symbol_from_transcript_id(variant.input_parses.ac)


if variant.gene_symbol == 'none':
variant.gene_symbol = ''
if input_parses.type == 'g' or input_parses.type == 'm':
Expand Down Expand Up @@ -188,7 +190,6 @@ def structure_checks_c(variant, validator):
validator.vr.validate(variant.input_parses)
except vvhgvs.exceptions.HGVSError as e:
error = str(e)

if 'datums is ill-defined' in error:
called_ref = variant.input_parses.posedit.edit.ref

Expand Down Expand Up @@ -470,13 +471,22 @@ def structure_checks_c(variant, validator):
variant.warnings.append(error)
logger.warning(error)
return True

try:
variant.evm.g_to_t(output, variant.input_parses.ac)
except vvhgvs.exceptions.HGVSError as e:
error = str(e)
variant.warnings.append(error)
logger.warning(error)
return True
if "Alignment is incomplete" in str(e):
output = hgvs_utils.incomplete_alignment_mapping_t_to_g(validator, variant)
if output is None:
error = str(e)
variant.warnings.append(error)
logger.warning(error)
return True
else:
error = str(e)
variant.warnings.append(error)
logger.warning(error)
return True

# Check that the reference is correct by direct mapping without replacing reference
check_ref_g = variant.no_replace_vm.t_to_g(variant.input_parses, output.ac,
Expand Down
1 change: 0 additions & 1 deletion VariantValidator/modules/vvMixinCore.py
Original file line number Diff line number Diff line change
Expand Up @@ -1636,7 +1636,6 @@ def validate(self,
for vt in variant.warnings:
vt = str(vt)


# Do not warn transcript not part of build if it's not the relevant transcript
if "is not part of genome build" in vt and term not in vt:
continue
Expand Down
16 changes: 16 additions & 0 deletions tests/test_allele_syntax.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,19 @@ def test_variant11(self):
"so should be described as NM_000059.4:c.1917_1929delinsTGCATTCTTCTGT" in
results["validation_warning_1"]["validation_warnings"])

# <LICENSE>
# Copyright (C) 2016-2024 VariantValidator Contributors
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
# </LICENSE>
90 changes: 90 additions & 0 deletions tests/test_alternate_alignments.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
from VariantValidator import Validator
from unittest import TestCase


class TestAltAlignments(TestCase):

@classmethod
def setup_class(cls):
cls.vv = Validator()
cls.vv.testing = True

def test_sub(self):
variant = 'NW_011332691.1(NM_012234.6):c.335+1G>C'
results = self.vv.validate(variant, 'GRCh38', 'all').format_as_dict(test=True)
print(results)
assert "NM_012234.6:c.335+1G>C" in list(results.keys())
assert results["NM_012234.6:c.335+1G>C"][
"genome_context_intronic_sequence"] == "NW_011332691.1(NM_012234.6):c.335+1G>C"
assert results["NM_012234.6:c.335+1G>C"]["alt_genomic_loci"][0]["grch38"][
"hgvs_genomic_description"] == "NW_011332691.1:g.53828C>G"

def test_del(self):
variant = 'NW_011332691.1(NM_012234.6):c.335+1del'
results = self.vv.validate(variant, 'GRCh38', 'all').format_as_dict(test=True)
print(results)
assert "NM_012234.6:c.335+1del" in list(results.keys())
assert "NW_011332691.1(NM_012234.6):c.335+1del" in results[
"NM_012234.6:c.335+1del"]["genome_context_intronic_sequence"]
assert "NW_011332691.1:g.53828del" in results["NM_012234.6:c.335+1del"]["alt_genomic_loci"][0]["grch38"][
"hgvs_genomic_description"]

def test_delins(self):
variant = "NW_011332691.1(NM_012234.6):c.335+1delinsAT"
results = self.vv.validate(variant, 'GRCh38', 'all').format_as_dict(test=True)
print(results)
assert "NM_012234.6:c.335+1delinsAT" in list(results.keys())
assert "NW_011332691.1(NM_012234.6):c.335+1delinsAT" in results[
"NM_012234.6:c.335+1delinsAT"]["genome_context_intronic_sequence"]
assert "NW_011332691.1:g.53828delinsAT" in results["NM_012234.6:c.335+1delinsAT"]["alt_genomic_loci"][
0]["grch38"]["hgvs_genomic_description"]

def test_inv(self):
variant = "NW_011332691.1(NM_012234.6):c.335+1_335+6inv"
results = self.vv.validate(variant, 'GRCh38', 'all').format_as_dict(test=True)
print(results)
assert "NM_012234.6:c.335+1_335+6inv" in list(results.keys())
assert "NW_011332691.1(NM_012234.6):c.335+1_335+6inv" in results[
"NM_012234.6:c.335+1_335+6inv"]["genome_context_intronic_sequence"]
assert "NW_011332691.1:g.53823_53828inv" in results["NM_012234.6:c.335+1_335+6inv"]["alt_genomic_loci"][0][
"grch38"]["hgvs_genomic_description"]

def test_dup(self):
variant = "NW_011332691.1(NM_012234.6):c.335+1dup"
results = self.vv.validate(variant, 'GRCh38', 'all').format_as_dict(test=True)
print(results)
assert "NM_012234.6:c.335+1dup" in list(results.keys())
assert "NW_011332691.1(NM_012234.6):c.335+1dup" in results[
"NM_012234.6:c.335+1dup"]["genome_context_intronic_sequence"]
assert "NW_011332691.1:g.53828dup" in results["NM_012234.6:c.335+1dup"]["alt_genomic_loci"][0]["grch38"][
"hgvs_genomic_description"]

def test_identity(self):
variant = "NW_011332691.1(NM_012234.6):c.335+1_335+6="
results = self.vv.validate(variant, 'GRCh38', 'all').format_as_dict(test=True)
print(results)
assert "NM_012234.6:c.335+1_335+6=" in list(results.keys())
assert "NW_011332691.1(NM_012234.6):c.335+1_335+6=" in results[
"NM_012234.6:c.335+1_335+6="]["genome_context_intronic_sequence"]
assert "NW_011332691.1:g.53823_53828=" in results["NM_012234.6:c.335+1_335+6="]["alt_genomic_loci"][0]["grch38"][
"hgvs_genomic_description"]

# <LICENSE>
# Copyright (C) 2016-2024 VariantValidator Contributors
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
# </LICENSE>



0 comments on commit 45f5d07

Please sign in to comment.