diff --git a/VariantValidator/modules/format_converters.py b/VariantValidator/modules/format_converters.py index c9ff1e8d..b7daad5f 100644 --- a/VariantValidator/modules/format_converters.py +++ b/VariantValidator/modules/format_converters.py @@ -637,11 +637,21 @@ def intronic_converter(variant, validator, skip_check=False): # Add the edited variant for next stage error processing e.g. exon boundaries. variant.quibble = transy + # Expanding list of exceptions + try: + hgvs_transy = validator.hp.parse_hgvs_variant(transy) + except vvhgvs.exceptions.HGVSError: + # Allele syntax caught here + if "[" in transy and "]" in transy: + return genomic_ref + else: + raise + if skip_check is True: return genomic_ref else: # Check the specified base is correct - hgvs_genomic = validator.nr_vm.c_to_g(validator.hp.parse_hgvs_variant(transy), genomic_ref, + hgvs_genomic = validator.nr_vm.c_to_g(hgvs_transy, genomic_ref, alt_aln_method=validator.alt_aln_method) try: validator.vr.validate(hgvs_genomic) @@ -652,6 +662,19 @@ def intronic_converter(variant, validator, skip_check=False): else: validator.vr.validate(hgvs_genomic) + # Check re-mapping of intronic variants + if hgvs_transy.posedit.pos.start.offset != 0 or hgvs_transy.posedit.pos.end.offset != 0: + try: + check_intronic_mapping = validator.nr_vm.g_to_t(hgvs_genomic, hgvs_transy.ac, + alt_aln_method=validator.alt_aln_method) + if check_intronic_mapping.posedit.pos == hgvs_transy.posedit.pos: + pass + else: + variant.warnings.append(f'ExonBoundaryError: {hgvs_transy.posedit.pos} does not match the exon ' + f'boundaries for the alignment of {hgvs_transy.ac} to {hgvs_genomic.ac}') + except vvhgvs.exceptions.HGVSError: + pass + logger.debug("HVGS typesetting complete") diff --git a/VariantValidator/modules/hgvs_utils.py b/VariantValidator/modules/hgvs_utils.py index f43b732d..62618690 100644 --- a/VariantValidator/modules/hgvs_utils.py +++ b/VariantValidator/modules/hgvs_utils.py @@ -2277,15 +2277,21 @@ def hgvs_ref_alt(hgvs_variant, sf): ref_alt_dict = {'ref': ref, 'alt': alt} return ref_alt_dict -# -# def hgvs_to_delins(hgvs_variant_object): -# """ -# Refer to https://github.com/openvar/vv_hgvs/blob/master/examples/creating-a-variant.ipynb -# :param hgvs_variant_object: -# :return: hgvs_variant_object in raw type = "delins" state -# """ -# -# + +def incomplete_alignment_mapping_t_to_g(validator, variant): + output = None + mapping_options = validator.hdp.get_tx_mapping_options(variant.input_parses.ac) + for option in mapping_options: + if option[2] == validator.alt_aln_method and "NC_" not in option[1]: + in_assembly = seq_data.to_chr_num_refseq(option[1], variant.primary_assembly) + if in_assembly is not None: + try: + output = validator.vm.t_to_g(variant.input_parses, option[1]) + if variant.input_parses.posedit.edit.type == "identity": + output.posedit.edit.alt = output.posedit.edit.ref + except vvhgvs.exceptions.HGVSError: + pass + return output # # Copyright (C) 2016-2024 VariantValidator Contributors diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index 4b6bcf13..fe805c22 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -240,13 +240,12 @@ def transcripts_to_gene(variant, validator, select_transcripts_dict_plus_version # Se rec_var to '' so it can be updated later rec_var = '' - # First task is to get the genomic equivalent, and pri nt useful error messages if it can't be found. + # First task is to get the genomic equivalent, and print useful error messages if it can't be found. try: to_g = validator.myevm_t_to_g(obj, variant.no_norm_evm, variant.primary_assembly, variant.hn) genomic_ac = to_g.ac except vvhgvs.exceptions.HGVSDataNotAvailableError as e: errors = [] - if ('~' in str(e) and 'Alignment is incomplete' in str(e)) or "No relevant genomic mapping options" in str(e): # Unable to map the input variant onto a genomic position if '~' in str(e) and 'Alignment is incomplete' in str(e): @@ -312,7 +311,19 @@ def transcripts_to_gene(variant, validator, select_transcripts_dict_plus_version else: # Normalize was I believe to replace ref. Mapping does this anyway # to_g = variant.hn.normalize(to_g) - formatted_variant = str(validator.myevm_g_to_t(variant.evm, to_g, tx_ac)) + try: + formatted_variant = str(validator.myevm_g_to_t(variant.evm, to_g, tx_ac)) + except vvhgvs.exceptions.HGVSError as e: + if "Alignment is incomplete" in str(e): + to_g = hgvs_utils.incomplete_alignment_mapping_t_to_g(validator, variant) + if to_g is None: + error = str(e) + variant.warnings.append(error) + logger.warning(error) + return True + else: + variant.hgvc_genomic = to_g + formatted_variant = str(validator.vm.g_to_t(to_g, tx_ac)) elif ':g.' in quibble_input: if plus.search(formatted_variant) or minus.search(formatted_variant): @@ -388,7 +399,6 @@ def transcripts_to_gene(variant, validator, select_transcripts_dict_plus_version # hgvs will handle incorrect coordinates so need to automap errors # Make sure any input intronic coordinates are correct # Get the desired transcript - if cck: # This should only ever hit coding variants (RNA has been converted to c by now) if 'del' in formatted_variant: @@ -414,9 +424,20 @@ def transcripts_to_gene(variant, validator, select_transcripts_dict_plus_version try: post_var = validator.myevm_g_to_t(variant.evm, pre_var, trans_acc) except vvhgvs.exceptions.HGVSError as error: - variant.warnings.append(str(error)) - logger.warning(str(error)) - return True + if "Alignment is incomplete" in str(error): + output = hgvs_utils.incomplete_alignment_mapping_t_to_g(validator, variant) + if output is None: + error = str(error) + variant.warnings.append(error) + logger.warning(error) + return True + else: + post_var = validator.vm.g_to_t(output, tx_ac) + variant.hgvs_genomic = output + else: + variant.warnings.append(str(error)) + logger.warning(str(error)) + return True test = validator.hp.parse_hgvs_variant(quibble_input) if post_var.posedit.pos.start.base != test.posedit.pos.start.base or \ @@ -467,7 +488,20 @@ def transcripts_to_gene(variant, validator, select_transcripts_dict_plus_version variant.hn) # genome back to C coordinates - post_var = validator.myevm_g_to_t(variant.evm, pre_var, trans_acc) + try: + post_var = validator.myevm_g_to_t(variant.evm, pre_var, trans_acc) + except vvhgvs.exceptions.HGVSError as e: + if "Alignment is incomplete" in str(e): + pre_var = hgvs_utils.incomplete_alignment_mapping_t_to_g(validator, variant) + if to_g is None: + error = str(e) + variant.warnings.append(error) + logger.warning(error) + return True + else: + post_var = validator.vm.g_to_t(pre_var, tx_ac) + variant.hgvs_genomic = pre_var + test = validator.hp.parse_hgvs_variant(quibble_input) if post_var.posedit.pos.start.base != test.posedit.pos.start.base or \ post_var.posedit.pos.end.base != test.posedit.pos.end.base: @@ -590,7 +624,12 @@ def transcripts_to_gene(variant, validator, select_transcripts_dict_plus_version logger.debug("gap_compensation_1 = " + str(gap_compensation)) # Genomic sequence - hgvs_genomic = validator.myevm_t_to_g(hgvs_coding, variant.no_norm_evm, variant.primary_assembly, variant.hn) + if variant.hgvs_genomic is not None: + hgvs_genomic = variant.hgvs_genomic + else: + hgvs_genomic = validator.myevm_t_to_g(hgvs_coding, variant.no_norm_evm, variant.primary_assembly, variant.hn) + + # Create gap_mapper object instance gap_mapper = gapped_mapping.GapMapper(variant, validator) @@ -875,12 +914,12 @@ def final_tx_to_multiple_genomic(variant, validator, tx_variant, liftover_level= ori = validator.tx_exons(tx_ac=variant.hgvs_coding.ac, alt_ac=alt_chr, alt_aln_method=validator.alt_aln_method) + # Map the variant to the genome hgvs_alt_genomic = validator.myvm_t_to_g(variant.hgvs_coding, alt_chr, variant.no_norm_evm, variant.hn) - gap_mapper = gapped_mapping.GapMapper(variant, validator) - # Loop out gap code under these circumstances! if gap_compensation: + gap_mapper = gapped_mapping.GapMapper(variant, validator) hgvs_alt_genomic, hgvs_coding = gap_mapper.g_to_t_gap_compensation_version3( hgvs_alt_genomic, variant.hgvs_coding, ori, alt_chr, rec_var) variant.hgvs_coding = hgvs_coding diff --git a/VariantValidator/modules/use_checking.py b/VariantValidator/modules/use_checking.py index d4127ee1..d1cda229 100644 --- a/VariantValidator/modules/use_checking.py +++ b/VariantValidator/modules/use_checking.py @@ -5,6 +5,7 @@ import logging from . import utils as fn import copy +from . import hgvs_utils logger = logging.getLogger(__name__) @@ -25,6 +26,7 @@ def refseq_common_mistakes(variant): variant.warnings.append(error) logger.warning(error) return True + # NR_ c. if variant.transcript_type == "n" and variant.reftype == ':c.': suggestion = variant.quibble.replace(':c.', ':n.') @@ -33,6 +35,7 @@ def refseq_common_mistakes(variant): variant.warnings.append(error) logger.warning(error) return True + # NM_ n. if variant.transcript_type == "c" and variant.reftype == ':n.': suggestion = variant.quibble.replace(':n.', ':c.') @@ -74,7 +77,6 @@ def structure_checks(variant, validator): variant.input_parses = input_parses variant.gene_symbol = validator.db.get_gene_symbol_from_transcript_id(variant.input_parses.ac) - if variant.gene_symbol == 'none': variant.gene_symbol = '' if input_parses.type == 'g' or input_parses.type == 'm': @@ -188,7 +190,6 @@ def structure_checks_c(variant, validator): validator.vr.validate(variant.input_parses) except vvhgvs.exceptions.HGVSError as e: error = str(e) - if 'datums is ill-defined' in error: called_ref = variant.input_parses.posedit.edit.ref @@ -470,13 +471,22 @@ def structure_checks_c(variant, validator): variant.warnings.append(error) logger.warning(error) return True + try: variant.evm.g_to_t(output, variant.input_parses.ac) except vvhgvs.exceptions.HGVSError as e: - error = str(e) - variant.warnings.append(error) - logger.warning(error) - return True + if "Alignment is incomplete" in str(e): + output = hgvs_utils.incomplete_alignment_mapping_t_to_g(validator, variant) + if output is None: + error = str(e) + variant.warnings.append(error) + logger.warning(error) + return True + else: + error = str(e) + variant.warnings.append(error) + logger.warning(error) + return True # Check that the reference is correct by direct mapping without replacing reference check_ref_g = variant.no_replace_vm.t_to_g(variant.input_parses, output.ac, diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index bcd66ebb..426e39bd 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -1636,7 +1636,6 @@ def validate(self, for vt in variant.warnings: vt = str(vt) - # Do not warn transcript not part of build if it's not the relevant transcript if "is not part of genome build" in vt and term not in vt: continue diff --git a/tests/test_allele_syntax.py b/tests/test_allele_syntax.py index efa4bb79..81e8842e 100644 --- a/tests/test_allele_syntax.py +++ b/tests/test_allele_syntax.py @@ -93,3 +93,19 @@ def test_variant11(self): "so should be described as NM_000059.4:c.1917_1929delinsTGCATTCTTCTGT" in results["validation_warning_1"]["validation_warnings"]) +# +# Copyright (C) 2016-2024 VariantValidator Contributors +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# \ No newline at end of file diff --git a/tests/test_alternate_alignments.py b/tests/test_alternate_alignments.py new file mode 100644 index 00000000..2b71e5b8 --- /dev/null +++ b/tests/test_alternate_alignments.py @@ -0,0 +1,90 @@ +from VariantValidator import Validator +from unittest import TestCase + + +class TestAltAlignments(TestCase): + + @classmethod + def setup_class(cls): + cls.vv = Validator() + cls.vv.testing = True + + def test_sub(self): + variant = 'NW_011332691.1(NM_012234.6):c.335+1G>C' + results = self.vv.validate(variant, 'GRCh38', 'all').format_as_dict(test=True) + print(results) + assert "NM_012234.6:c.335+1G>C" in list(results.keys()) + assert results["NM_012234.6:c.335+1G>C"][ + "genome_context_intronic_sequence"] == "NW_011332691.1(NM_012234.6):c.335+1G>C" + assert results["NM_012234.6:c.335+1G>C"]["alt_genomic_loci"][0]["grch38"][ + "hgvs_genomic_description"] == "NW_011332691.1:g.53828C>G" + + def test_del(self): + variant = 'NW_011332691.1(NM_012234.6):c.335+1del' + results = self.vv.validate(variant, 'GRCh38', 'all').format_as_dict(test=True) + print(results) + assert "NM_012234.6:c.335+1del" in list(results.keys()) + assert "NW_011332691.1(NM_012234.6):c.335+1del" in results[ + "NM_012234.6:c.335+1del"]["genome_context_intronic_sequence"] + assert "NW_011332691.1:g.53828del" in results["NM_012234.6:c.335+1del"]["alt_genomic_loci"][0]["grch38"][ + "hgvs_genomic_description"] + + def test_delins(self): + variant = "NW_011332691.1(NM_012234.6):c.335+1delinsAT" + results = self.vv.validate(variant, 'GRCh38', 'all').format_as_dict(test=True) + print(results) + assert "NM_012234.6:c.335+1delinsAT" in list(results.keys()) + assert "NW_011332691.1(NM_012234.6):c.335+1delinsAT" in results[ + "NM_012234.6:c.335+1delinsAT"]["genome_context_intronic_sequence"] + assert "NW_011332691.1:g.53828delinsAT" in results["NM_012234.6:c.335+1delinsAT"]["alt_genomic_loci"][ + 0]["grch38"]["hgvs_genomic_description"] + + def test_inv(self): + variant = "NW_011332691.1(NM_012234.6):c.335+1_335+6inv" + results = self.vv.validate(variant, 'GRCh38', 'all').format_as_dict(test=True) + print(results) + assert "NM_012234.6:c.335+1_335+6inv" in list(results.keys()) + assert "NW_011332691.1(NM_012234.6):c.335+1_335+6inv" in results[ + "NM_012234.6:c.335+1_335+6inv"]["genome_context_intronic_sequence"] + assert "NW_011332691.1:g.53823_53828inv" in results["NM_012234.6:c.335+1_335+6inv"]["alt_genomic_loci"][0][ + "grch38"]["hgvs_genomic_description"] + + def test_dup(self): + variant = "NW_011332691.1(NM_012234.6):c.335+1dup" + results = self.vv.validate(variant, 'GRCh38', 'all').format_as_dict(test=True) + print(results) + assert "NM_012234.6:c.335+1dup" in list(results.keys()) + assert "NW_011332691.1(NM_012234.6):c.335+1dup" in results[ + "NM_012234.6:c.335+1dup"]["genome_context_intronic_sequence"] + assert "NW_011332691.1:g.53828dup" in results["NM_012234.6:c.335+1dup"]["alt_genomic_loci"][0]["grch38"][ + "hgvs_genomic_description"] + + def test_identity(self): + variant = "NW_011332691.1(NM_012234.6):c.335+1_335+6=" + results = self.vv.validate(variant, 'GRCh38', 'all').format_as_dict(test=True) + print(results) + assert "NM_012234.6:c.335+1_335+6=" in list(results.keys()) + assert "NW_011332691.1(NM_012234.6):c.335+1_335+6=" in results[ + "NM_012234.6:c.335+1_335+6="]["genome_context_intronic_sequence"] + assert "NW_011332691.1:g.53823_53828=" in results["NM_012234.6:c.335+1_335+6="]["alt_genomic_loci"][0]["grch38"][ + "hgvs_genomic_description"] + +# +# Copyright (C) 2016-2024 VariantValidator Contributors +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# + + +