Skip to content

Commit

Permalink
Add tweaks to genes2transcripts to handle gene symbols that are updat…
Browse files Browse the repository at this point in the history
…ing and HGNC genes with no transcript info openvar/rest_variantValidator#186 and also handle the longer deletions in #651
  • Loading branch information
Peter-J-Freeman committed Dec 4, 2024
1 parent dbfb7b1 commit 033d948
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 12 deletions.
19 changes: 15 additions & 4 deletions VariantValidator/modules/gapped_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,7 @@ def gapped_g_to_c(self, rel_var, select_transcripts_dict):
self.validator.hp,
self.validator.vm,
self.validator.merge_hgvs_3pr,
genomic_ac=hgvs_genomic_variant.ac,)
genomic_ac=hgvs_genomic_variant.ac)

if vcf__dict['needs_a_push'] is True:
needs_a_push = True
Expand Down Expand Up @@ -688,7 +688,8 @@ def gapped_g_to_c(self, rel_var, select_transcripts_dict):
hgvs_not_delins = saved_hgvs_coding
self.disparity_deletion_in = ['false', 'false']
elif 'Normalization of intronic variants is not supported' in error:
# We know that this cannot be because of an intronic variant, so must be aligned to tx gap
# We know that this cannot be because of an intronic variant, so must be aligned to
# tx gap
self.disparity_deletion_in = ['transcript', 'Requires Analysis']
logger.info(error)

Expand All @@ -704,7 +705,6 @@ def gapped_g_to_c(self, rel_var, select_transcripts_dict):

# GAP IN THE TRANSCRIPT DISPARITY DETECTED
if self.disparity_deletion_in[0] == 'transcript':

# Check for issue https://github.com/openvar/variantValidator/issues/385 where the gap is
# being identified but oddly the vm is not compensating, likely due to odd sequence
try:
Expand All @@ -724,7 +724,8 @@ def gapped_g_to_c(self, rel_var, select_transcripts_dict):
# the actual length should be == gen_len_difference - 1 not == gen_len_difference
if tx_len_difference - self.disparity_deletion_in[1] == gen_len_difference:
# So here we know we need to knock off disparity_deletion_in[1] bases
if len(hgvs_not_delins.posedit.edit.alt) == len(self.tx_hgvs_not_delins.posedit.edit.alt):
if len(hgvs_not_delins.posedit.edit.alt) == len(
self.tx_hgvs_not_delins.posedit.edit.alt):
if self.orientation == 1:
self.tx_hgvs_not_delins.posedit.edit.ref = hgvs_not_delins.posedit.ref
else:
Expand Down Expand Up @@ -2815,6 +2816,7 @@ def transcript_disparity(self, reverse_normalized_hgvs_genomic, stored_hgvs_not_

if self.tx_hgvs_not_delins.posedit.pos.start.offset == 0 and \
self.tx_hgvs_not_delins.posedit.pos.end.offset == 0:

# In this instance, we have identified a transcript gap but the n. version of
# the transcript variant but do not have a position which actually hits the gap,
# so the variant likely spans the gap, and is not picked up by an offset.
Expand All @@ -2833,6 +2835,15 @@ def transcript_disparity(self, reverse_normalized_hgvs_genomic, stored_hgvs_not_
hgvs_refreshed_variant = self.tx_hgvs_not_delins
return hgvs_refreshed_variant

elif (((g3.posedit.pos.end.base - g3.posedit.pos.start.base) >
(hgvs_genomic_norm.posedit.pos.end.base - hgvs_genomic_norm.posedit.pos.start.base)) and
hgvs_genomic_norm.posedit.edit.type == 'del' and
g3.posedit.pos.start.base < hgvs_genomic_norm.posedit.pos.start.base and
(g3.posedit.pos.end.base + int(self.disparity_deletion_in[1])) <
hgvs_genomic_norm.posedit.pos.end.base):
hgvs_refreshed_variant = self.tx_hgvs_not_delins
return hgvs_refreshed_variant

g3.posedit.pos.end.base = g3.posedit.pos.start.base + (len(g3.posedit.edit.ref) - 1)
try:
c2 = self.validator.vm.g_to_t(g3, c1.ac, alt_aln_method=self.validator.alt_aln_method)
Expand Down
30 changes: 23 additions & 7 deletions VariantValidator/modules/gene2transcripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from . import utils as fn
from . import seq_data


# Pre compile variables
vvhgvs.global_config.formatting.max_ref_length = 1000000

Expand Down Expand Up @@ -53,11 +54,15 @@ def gene2transcripts(g2t, query, validator=False, bypass_web_searches=False, sel
query = g2t.db.get_stable_gene_id_from_hgnc_id(query)[1]
if query == "No data":
try:
query = validator.db.get_transcripts_from_annotations(store_query)
for tx in query:
if tx[5] != "unassigned":
query = tx[5]
break
query = g2t.db.get_transcripts_from_annotations(store_query)
if "none" not in query[0]:
for tx in query:
if tx[5] != "unassigned":
query = tx[5]
break
else:
return {'error': 'Unable to recognise HGNC ID. Please provide a gene symbol',
"requested_symbol": store_query}
except TypeError:
pass

Expand Down Expand Up @@ -170,10 +175,21 @@ def gene2transcripts(g2t, query, validator=False, bypass_web_searches=False, sel
hgnc_id = vvta_record[0][0]
previous_sym = hgnc
symbol_identified = True
if len(vvta_record) > 1:
elif len(vvta_record) > 1:
return {'error': '%s is a previous symbol for %s genes. '
'Refer to https://www.genenames.org/' % (current_sym, str(len(vvta_record))),
'Refer to https://www.genenames.org/' % (hgnc, str(len(vvta_record))),
"requested_symbol": query}
else:
# Is it an updated symbol?
old_symbol = g2t.db.get_uta_symbol(hgnc)
if old_symbol is not None:
vvta_record = g2t.hdp.get_gene_info(old_symbol)
if vvta_record is not None:
current_sym = hgnc
gene_name = vvta_record[3]
hgnc_id = vvta_record[0]
previous_sym = old_symbol
symbol_identified = True

if symbol_identified is False:
return {'error': 'Unable to recognise gene symbol %s' % hgnc,
Expand Down
2 changes: 1 addition & 1 deletion VariantValidator/modules/hgvs_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1058,7 +1058,7 @@ def hard_right_hgvs2vcf(hgvs_genomic, primary_assembly, hn, reverse_normalizer,

# Look for flank subs that may be missed when naieve mapping c > c made a delins from a sub
# This is a hgvs.py quirl for flanking subs in the antisense oriemntation and refers to
#
# https://github.com/openvar/variantValidator/issues/651
try:
normalized_end_seq_check_mapped = hn.normalize(end_seq_check_mapped)
normalized_end_seq_check_variant = hn.normalize(end_seq_check_variant)
Expand Down
7 changes: 7 additions & 0 deletions tests/test_gene2transcript.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,13 @@ def test_symbol_valid_hgnc_id(self):
results = self.vv.gene2transcripts(symbol)
assert results["current_symbol"] == "COL1A1"

def test_symbol_invalid_hgnc_id(self):
symbol = 'HGNC:12029'
results = self.vv.gene2transcripts(symbol)
print(results)
assert results["error"] == "Unable to recognise HGNC ID. Please provide a gene symbol"
assert results["requested_symbol"] == symbol

def test_multiple_genes(self):
symbol = '["HGNC:2197", "COL1A1", "2197"]'
results = self.vv.gene2transcripts(symbol)
Expand Down

0 comments on commit 033d948

Please sign in to comment.