Skip to content

Commit

Permalink
Merge pull request #34 from openvar/develop_2_0
Browse files Browse the repository at this point in the history
  • Loading branch information
Peter J. Freeman authored Aug 8, 2022
2 parents 726ebad + 9b107d4 commit 662bbe3
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 56 deletions.
17 changes: 1 addition & 16 deletions VariantFormatter/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,26 +261,13 @@ def fetch_aligned_transcripts(hgvs_genomic, transcript_model, vfo):
refseq_list = vfo.hdp.get_tx_for_region(hgvs_genomic.ac, 'splign', hgvs_genomic.posedit.pos.start.base - 1,
hgvs_genomic.posedit.pos.end.base)

# Keeping these print statements because they enable us to check UTA alignment errors
# print('\nIn A')
# print(refseq_list)
# print(hgvs_genomic)
# print('end')

# Transcript edge antisense! - If doesn't map, will be weeded out downstream!
if refseq_list == []:
refseq_list = vfo.hdp.get_tx_for_region(hgvs_genomic.ac, 'splign', hgvs_genomic.posedit.pos.start.base,
hgvs_genomic.posedit.pos.end.base - 1)

tx_list = tx_list + refseq_list

# Keeping these print statements because they enable us to check UTA alignment errors
# print(['gene', 'tx', 'chr', 'aln', 'ori', 'exon', 'tx_st', 'tx_end', 'chr_st', 'chr_end', 'cigar'])
# for tx in tx_list:
# exons = vfo.hdp.get_tx_exons(tx[0], tx[1], tx[3])
# for e in exons:
# print(e[0:11])

return tx_list


Expand Down Expand Up @@ -318,15 +305,13 @@ def remove_reference(hgvs_nucleotide):
"""


def gap_checker(hgvs_transcript, hgvs_genomic, un_norm_hgvs_genomic, genome_build, vfo):
def gap_checker(hgvs_transcript, hgvs_genomic, genome_build, vfo):

tx_id = hgvs_transcript.ac
if re.match('ENST', tx_id):
alt_aln_method = 'genebuild'
hn = vfo.genebuild_normalizer
rhn = vfo.reverse_genebuild_normalizer
elif re.match('NM', tx_id) or re.match('NR', tx_id):
alt_aln_method = 'splign'
hn = vfo.splign_normalizer
rhn = vfo.reverse_splign_normalizer

Expand Down
47 changes: 21 additions & 26 deletions VariantFormatter/gapGenes.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,14 @@
# -*- coding: utf-8 -*-

# Import python modules
# from distutils.version import StrictVersion
import re
# import copy
import vvhgvs.exceptions
import vvhgvs.assemblymapper
import vvhgvs.variantmapper

# VV
import VariantValidator
import VariantValidator.modules.seq_data as seq_data
# import VariantValidator.modules.hgvs_utils as va_H2V
import VariantValidator.modules.gapped_mapping
from VariantValidator.modules.variant import Variant
# vval = VariantValidator.Validator()


"""
Expand Down Expand Up @@ -45,29 +39,32 @@ def compensate_g_to_t(hgvs_tx,
hdp,
vfo):

# Set Variable
re_hash_hgvs_genomic = hgvs_genomic

# Not required in these instances
if re.match('ENST', hgvs_tx.ac): # or (StrictVersion(str(hgvs_version)) > StrictVersion('1.1.3') is True):
if re.match('ENST', hgvs_tx.ac): # or (StrictVersion(str(hgvs_version)) > StrictVersion('1.1.3') is True):

# Push to absolute position
normalized_tx = fully_normalize(hgvs_tx, hgvs_genomic, hn, reverse_normalizer,
hdp, vm, vfo)
vm, vfo)
hgvs_tx_returns = [normalized_tx, False, None, None, None]


else:
gene_symbol = hdp.get_tx_identity_info(hgvs_tx.ac)[6]

# Check the blacklist
gap_compensation = gap_black_list(gene_symbol)
# print('Is gapped = ' + str(gap_compensation))
if gap_compensation is False:
normalized_tx = fully_normalize(hgvs_tx, hgvs_genomic, hn,
reverse_normalizer, hdp, vm, vfo)
reverse_normalizer, vm, vfo)
hgvs_tx_returns = [normalized_tx, False, None, None, None]
else:

"""
Code now modified to use native VV gap mapper
"""

# VV functions require evm instances
no_norm_evm = vvhgvs.assemblymapper.AssemblyMapper(hdp,
assembly_name=primary_assembly,
Expand Down Expand Up @@ -96,12 +93,17 @@ def compensate_g_to_t(hgvs_tx,
variant.vm = vm
variant.primary_assembly = primary_assembly
variant.post_format_conversion = hgvs_genomic

# Map the gap
gap_mapper = VariantValidator.modules.gapped_mapping.GapMapper(variant, vfo)
data, nw_rel_var = gap_mapper.gapped_g_to_c([str(hgvs_tx)], select_transcripts_dict={})
ori = vfo.tx_exons(tx_ac=hgvs_tx.ac, alt_ac=hgvs_genomic.ac, alt_aln_method=vfo.alt_aln_method)
re_hash_hgvs_genomic, suppress_c_normalization, hgvs_coding = gap_mapper.g_to_t_compensation(ori,
nw_rel_var[0],
"")

# Populate output list
gap_compensated_tx_2 = []
gap_compensated_tx_2.append(nw_rel_var[0])
gap_compensated_tx_2 = [nw_rel_var[0]]
if "does not represent a true variant" in data["gapped_alignment_warning"] \
or "may be an artefact of" in data["gapped_alignment_warning"]:
gap_compensated_tx_2.append(True)
Expand All @@ -125,30 +127,29 @@ def compensate_g_to_t(hgvs_tx,

if gap_compensated_tx_2[1] is False:
refresh_hgvs_tx = fully_normalize(hgvs_tx, hgvs_genomic, hn,
reverse_normalizer, hdp, vm, vfo)
reverse_normalizer, vm, vfo)
gap_compensated_tx_2[0] = refresh_hgvs_tx
hgvs_tx_returns = gap_compensated_tx_2

# Create response dictionary for gap mapping output
hgvs_tx_dict = {'hgvs_transcript': hgvs_tx_returns[0],
# 'position_lock': hgvs_tx_returns[1],
'gapped_alignment_warning': hgvs_tx_returns[3],
'corrective_action': hgvs_tx_returns[2],
'gap_position': hgvs_tx_returns[-1],
'transcript_accession': hgvs_tx_returns[0].ac
'transcript_accession': hgvs_tx_returns[0].ac,
'hgvs_genomic': re_hash_hgvs_genomic
}

return hgvs_tx_dict


"""
Fully normalizes the hgvs_tx variant from the hgvs_genomic variant
Is only activated if the g_to_t_compensation_code IS NOT USED!
"""


def fully_normalize(hgvs_tx, hgvs_genomic, hn, reverse_normalizer, hdp, vm, vfo):
def fully_normalize(hgvs_tx, hgvs_genomic, hn, reverse_normalizer, vm, vfo):

# set required variables
tx_id = hgvs_tx.ac
Expand All @@ -167,19 +168,13 @@ def fully_normalize(hgvs_tx, hgvs_genomic, hn, reverse_normalizer, hdp, vm, vfo)
else:
hgvs_genomic = hn.normalize(hgvs_genomic)

# print('Try 1')
try:
hgvs_tx = vm.g_to_t(hgvs_genomic, hgvs_tx.ac)
except vvhgvs.exceptions.HGVSError as e:
# print('Error area 1')
# print(e)
except vvhgvs.exceptions.HGVSError:
pass
# print('Try 2')
try:
hgvs_tx = hn.normalize(hgvs_tx)
except vvhgvs.exceptions.HGVSError as e:
# print('Error area 2')
# print(e)
except vvhgvs.exceptions.HGVSError:
pass

return hgvs_tx
Expand Down
31 changes: 17 additions & 14 deletions VariantFormatter/variantformatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@
import VariantFormatter.formatter as formatter
# import VV
import VariantValidator.modules.liftover as lo

# Custom Exceptions
import VariantValidator.modules.gapped_mapping
from VariantValidator.modules.variant import Variant


# Custom Exceptions
class vcf2hgvsError(Exception):
pass

Expand All @@ -34,7 +35,7 @@ class variableError(Exception):
pass


# Create variantformatter object
# Create Genomic Descriptions object
class GenomicDescriptions(object):
"""
Object contains genomic level sequence variant descriptions in the pseudo vcf (p_vcf)
Expand Down Expand Up @@ -349,16 +350,9 @@ def __init__(self, variant_description, genome_build, vfo, transcript_model=None

# Gap checking
try:
am_i_gapped = formatter.gap_checker(hgvs_transcript_dict['hgvs_transcript'], g_hgvs, un_norm_hgvs, self.genome_build, self.vfo)
except Exception as e:
# Error detection prime location!
# import sys
# import traceback
# exc_type, exc_value, last_traceback = sys.exc_info()
# te = traceback.format_exc()
# tbk = [str(exc_type), str(exc_value), str(te)]
# er = str('\n'.join(tbk))
# print(er)
am_i_gapped = formatter.gap_checker(hgvs_transcript_dict['hgvs_transcript'], g_hgvs,
self.genome_build, self.vfo)
except Exception:
self.warning_level = 'processing_error'
if hgvs_transcript_dict['error'] == '':
hgvs_transcript_dict['error'] = None
Expand Down Expand Up @@ -443,6 +437,11 @@ def __init__(self, variant_description, genome_build, vfo, transcript_model=None
if (order_my_tp['transcript_variant_error'] is not None and g_to_g_lift == {}) or (
order_my_tp['transcript_variant_error'] is None):

if order_my_tp['t_hgvs'] is not None:
specified_tx_variant = formatter.parse(order_my_tp['t_hgvs'], self.vfo)
else:
specified_tx_variant = None

current_lift = lo.liftover(self.genomic_descriptions.g_hgvs,
self.genomic_descriptions.selected_build,
build_to,
Expand All @@ -451,8 +450,12 @@ def __init__(self, variant_description, genome_build, vfo, transcript_model=None
None,
vfo,
specify_tx=tx_id,
liftover_level=self.liftover
liftover_level=self.liftover,
gap_map=formatter.gap_checker,
vfo=self.vfo,
specified_tx_variant=specified_tx_variant
)

if g_to_g_lift == {}:
g_to_g_lift = current_lift

Expand Down

0 comments on commit 662bbe3

Please sign in to comment.