diff --git a/VariantFormatter/formatter.py b/VariantFormatter/formatter.py index 6197b3c..d130780 100644 --- a/VariantFormatter/formatter.py +++ b/VariantFormatter/formatter.py @@ -261,12 +261,6 @@ def fetch_aligned_transcripts(hgvs_genomic, transcript_model, vfo): refseq_list = vfo.hdp.get_tx_for_region(hgvs_genomic.ac, 'splign', hgvs_genomic.posedit.pos.start.base - 1, hgvs_genomic.posedit.pos.end.base) - # Keeping these print statements because they enable us to check UTA alignment errors - # print('\nIn A') - # print(refseq_list) - # print(hgvs_genomic) - # print('end') - # Transcript edge antisense! - If doesn't map, will be weeded out downstream! if refseq_list == []: refseq_list = vfo.hdp.get_tx_for_region(hgvs_genomic.ac, 'splign', hgvs_genomic.posedit.pos.start.base, @@ -274,13 +268,6 @@ def fetch_aligned_transcripts(hgvs_genomic, transcript_model, vfo): tx_list = tx_list + refseq_list - # Keeping these print statements because they enable us to check UTA alignment errors - # print(['gene', 'tx', 'chr', 'aln', 'ori', 'exon', 'tx_st', 'tx_end', 'chr_st', 'chr_end', 'cigar']) - # for tx in tx_list: - # exons = vfo.hdp.get_tx_exons(tx[0], tx[1], tx[3]) - # for e in exons: - # print(e[0:11]) - return tx_list @@ -318,15 +305,13 @@ def remove_reference(hgvs_nucleotide): """ -def gap_checker(hgvs_transcript, hgvs_genomic, un_norm_hgvs_genomic, genome_build, vfo): +def gap_checker(hgvs_transcript, hgvs_genomic, genome_build, vfo): tx_id = hgvs_transcript.ac if re.match('ENST', tx_id): - alt_aln_method = 'genebuild' hn = vfo.genebuild_normalizer rhn = vfo.reverse_genebuild_normalizer elif re.match('NM', tx_id) or re.match('NR', tx_id): - alt_aln_method = 'splign' hn = vfo.splign_normalizer rhn = vfo.reverse_splign_normalizer diff --git a/VariantFormatter/gapGenes.py b/VariantFormatter/gapGenes.py index 11e69fd..10a3784 100644 --- a/VariantFormatter/gapGenes.py +++ b/VariantFormatter/gapGenes.py @@ -1,20 +1,14 @@ # -*- coding: utf-8 -*- # Import python modules -# from distutils.version import StrictVersion import re -# import copy import vvhgvs.exceptions import vvhgvs.assemblymapper import vvhgvs.variantmapper - -# VV import VariantValidator import VariantValidator.modules.seq_data as seq_data -# import VariantValidator.modules.hgvs_utils as va_H2V import VariantValidator.modules.gapped_mapping from VariantValidator.modules.variant import Variant -# vval = VariantValidator.Validator() """ @@ -45,29 +39,32 @@ def compensate_g_to_t(hgvs_tx, hdp, vfo): + # Set Variable + re_hash_hgvs_genomic = hgvs_genomic + # Not required in these instances - if re.match('ENST', hgvs_tx.ac): # or (StrictVersion(str(hgvs_version)) > StrictVersion('1.1.3') is True): + if re.match('ENST', hgvs_tx.ac): # or (StrictVersion(str(hgvs_version)) > StrictVersion('1.1.3') is True): # Push to absolute position normalized_tx = fully_normalize(hgvs_tx, hgvs_genomic, hn, reverse_normalizer, - hdp, vm, vfo) + vm, vfo) hgvs_tx_returns = [normalized_tx, False, None, None, None] + else: gene_symbol = hdp.get_tx_identity_info(hgvs_tx.ac)[6] + # Check the blacklist gap_compensation = gap_black_list(gene_symbol) - # print('Is gapped = ' + str(gap_compensation)) if gap_compensation is False: normalized_tx = fully_normalize(hgvs_tx, hgvs_genomic, hn, - reverse_normalizer, hdp, vm, vfo) + reverse_normalizer, vm, vfo) hgvs_tx_returns = [normalized_tx, False, None, None, None] else: """ Code now modified to use native VV gap mapper """ - # VV functions require evm instances no_norm_evm = vvhgvs.assemblymapper.AssemblyMapper(hdp, assembly_name=primary_assembly, @@ -96,12 +93,17 @@ def compensate_g_to_t(hgvs_tx, variant.vm = vm variant.primary_assembly = primary_assembly variant.post_format_conversion = hgvs_genomic + + # Map the gap gap_mapper = VariantValidator.modules.gapped_mapping.GapMapper(variant, vfo) data, nw_rel_var = gap_mapper.gapped_g_to_c([str(hgvs_tx)], select_transcripts_dict={}) + ori = vfo.tx_exons(tx_ac=hgvs_tx.ac, alt_ac=hgvs_genomic.ac, alt_aln_method=vfo.alt_aln_method) + re_hash_hgvs_genomic, suppress_c_normalization, hgvs_coding = gap_mapper.g_to_t_compensation(ori, + nw_rel_var[0], + "") # Populate output list - gap_compensated_tx_2 = [] - gap_compensated_tx_2.append(nw_rel_var[0]) + gap_compensated_tx_2 = [nw_rel_var[0]] if "does not represent a true variant" in data["gapped_alignment_warning"] \ or "may be an artefact of" in data["gapped_alignment_warning"]: gap_compensated_tx_2.append(True) @@ -125,17 +127,17 @@ def compensate_g_to_t(hgvs_tx, if gap_compensated_tx_2[1] is False: refresh_hgvs_tx = fully_normalize(hgvs_tx, hgvs_genomic, hn, - reverse_normalizer, hdp, vm, vfo) + reverse_normalizer, vm, vfo) gap_compensated_tx_2[0] = refresh_hgvs_tx hgvs_tx_returns = gap_compensated_tx_2 # Create response dictionary for gap mapping output hgvs_tx_dict = {'hgvs_transcript': hgvs_tx_returns[0], - # 'position_lock': hgvs_tx_returns[1], 'gapped_alignment_warning': hgvs_tx_returns[3], 'corrective_action': hgvs_tx_returns[2], 'gap_position': hgvs_tx_returns[-1], - 'transcript_accession': hgvs_tx_returns[0].ac + 'transcript_accession': hgvs_tx_returns[0].ac, + 'hgvs_genomic': re_hash_hgvs_genomic } return hgvs_tx_dict @@ -143,12 +145,11 @@ def compensate_g_to_t(hgvs_tx, """ Fully normalizes the hgvs_tx variant from the hgvs_genomic variant - Is only activated if the g_to_t_compensation_code IS NOT USED! """ -def fully_normalize(hgvs_tx, hgvs_genomic, hn, reverse_normalizer, hdp, vm, vfo): +def fully_normalize(hgvs_tx, hgvs_genomic, hn, reverse_normalizer, vm, vfo): # set required variables tx_id = hgvs_tx.ac @@ -167,19 +168,13 @@ def fully_normalize(hgvs_tx, hgvs_genomic, hn, reverse_normalizer, hdp, vm, vfo) else: hgvs_genomic = hn.normalize(hgvs_genomic) - # print('Try 1') try: hgvs_tx = vm.g_to_t(hgvs_genomic, hgvs_tx.ac) - except vvhgvs.exceptions.HGVSError as e: - # print('Error area 1') - # print(e) + except vvhgvs.exceptions.HGVSError: pass - # print('Try 2') try: hgvs_tx = hn.normalize(hgvs_tx) - except vvhgvs.exceptions.HGVSError as e: - # print('Error area 2') - # print(e) + except vvhgvs.exceptions.HGVSError: pass return hgvs_tx diff --git a/VariantFormatter/variantformatter.py b/VariantFormatter/variantformatter.py index 936675e..d8a3ac6 100644 --- a/VariantFormatter/variantformatter.py +++ b/VariantFormatter/variantformatter.py @@ -18,10 +18,11 @@ import VariantFormatter.formatter as formatter # import VV import VariantValidator.modules.liftover as lo - -# Custom Exceptions +import VariantValidator.modules.gapped_mapping +from VariantValidator.modules.variant import Variant +# Custom Exceptions class vcf2hgvsError(Exception): pass @@ -34,7 +35,7 @@ class variableError(Exception): pass -# Create variantformatter object +# Create Genomic Descriptions object class GenomicDescriptions(object): """ Object contains genomic level sequence variant descriptions in the pseudo vcf (p_vcf) @@ -349,16 +350,9 @@ def __init__(self, variant_description, genome_build, vfo, transcript_model=None # Gap checking try: - am_i_gapped = formatter.gap_checker(hgvs_transcript_dict['hgvs_transcript'], g_hgvs, un_norm_hgvs, self.genome_build, self.vfo) - except Exception as e: - # Error detection prime location! - # import sys - # import traceback - # exc_type, exc_value, last_traceback = sys.exc_info() - # te = traceback.format_exc() - # tbk = [str(exc_type), str(exc_value), str(te)] - # er = str('\n'.join(tbk)) - # print(er) + am_i_gapped = formatter.gap_checker(hgvs_transcript_dict['hgvs_transcript'], g_hgvs, + self.genome_build, self.vfo) + except Exception: self.warning_level = 'processing_error' if hgvs_transcript_dict['error'] == '': hgvs_transcript_dict['error'] = None @@ -443,6 +437,11 @@ def __init__(self, variant_description, genome_build, vfo, transcript_model=None if (order_my_tp['transcript_variant_error'] is not None and g_to_g_lift == {}) or ( order_my_tp['transcript_variant_error'] is None): + if order_my_tp['t_hgvs'] is not None: + specified_tx_variant = formatter.parse(order_my_tp['t_hgvs'], self.vfo) + else: + specified_tx_variant = None + current_lift = lo.liftover(self.genomic_descriptions.g_hgvs, self.genomic_descriptions.selected_build, build_to, @@ -451,8 +450,12 @@ def __init__(self, variant_description, genome_build, vfo, transcript_model=None None, vfo, specify_tx=tx_id, - liftover_level=self.liftover + liftover_level=self.liftover, + gap_map=formatter.gap_checker, + vfo=self.vfo, + specified_tx_variant=specified_tx_variant ) + if g_to_g_lift == {}: g_to_g_lift = current_lift